Fix accidental removal of import path and invalid indirect.

Merge remote-tracking branch 'origin/develop'
Allow disabling auto-join on invite.
2017-02-22 18:52:33 +01:00 · 2017-02-22 18:47:30 +01:00 · 2017-02-22 18:46:58 +01:00 · 2016-08-09 00:15:51 +02:00 · 2016-08-08 14:05:55 +02:00 · 2016-08-08 13:53:44 +02:00
13 changed files with 415 additions and 169 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,5 @@
+language: go
+
+go:
+  - 1.6.2
+  - tip
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 # MediaLink IRC Bot

+[![Build Status](https://travis-ci.org/icedream/irc-medialink.svg?branch=master)](https://travis-ci.org/icedream/irc-medialink)
+
 This IRC bot automatically parses links posted in chat rooms and prints information about them.

 Currently explicit support has been built in for:
@ -11,7 +13,7 @@ Currently explicit support has been built in for:

 Generally, for websites that are not directly supported the bot will print the page title.

-# How to run the bot
+## How to run the bot

 In order to properly run the bot, you need to [register a SoundCloud application](http://soundcloud.com/you/apps/new) and [get a YouTube Data API key](https://console.developers.google.com/apis/api/youtube/overview) for it and then feed the API data to the bot through the command line arguments.

@ -25,7 +27,7 @@ Then you can find out which options you can pass to the bot directly by running

 You need to at least pass the `--server`, `--youtube-key`, `--soundcloud-id` and `--soundcloud-secret` parameters.

-# ...with Docker
+### ...with Docker

 You can use the `icedream/irc-medialink` image in order to run this bot in Docker. You can pull it using this command:

@ -54,12 +56,12 @@ services:
    restart: always
 ```

-# Support
+## Support

 This bot is officially tested and running on the LibraIRC IRC network (irc.librairc.net) though also being able to run on other IRC networks.

 For support on LibraIRC please use the channel #MediaLink there to get in contact with Icedream.

-# License
+## License

 This project is licensed under the **GNU General Public License Version 2 or later**. For more information check the [LICENSE](LICENSE) file.
--- a/main.go
+++ b/main.go
@ -32,7 +32,10 @@ func main() {
 	var soundcloudClientId string
 	var soundcloudClientSecret string

+	var webEnableImages bool
+
 	var debug bool
+	var noInvite bool
 	var useTLS bool
 	var server string
 	var password string
@ -48,6 +51,7 @@ func main() {
 	kingpin.Flag("nick", "The nickname.").Short('n').StringVar(&nickname)
 	kingpin.Flag("ident", "The ident.").Short('i').StringVar(&ident)
 	kingpin.Flag("debug", "Enables debug mode.").Short('d').BoolVar(&debug)
+	kingpin.Flag("no-invite", "Disables auto-join on invite.").BoolVar(&noInvite)
 	kingpin.Flag("tls", "Use TLS.").BoolVar(&useTLS)
 	kingpin.Flag("server", "The server to connect to.").Short('s').StringVar(&server)
 	kingpin.Flag("password", "The password to use for logging into the IRC server.").Short('p').StringVar(&password)
@ -58,9 +62,14 @@ func main() {

 	// Youtube config
 	kingpin.Flag("youtube-key", "The API key to use to access the YouTube API.").StringVar(&youtubeApiKey)
+
+	// SoundCloud config
 	kingpin.Flag("soundcloud-id", "The SoundCloud ID.").StringVar(&soundcloudClientId)
 	kingpin.Flag("soundcloud-secret", "The SoundCloud secret.").StringVar(&soundcloudClientSecret)

+	// Web parser config
+	kingpin.Flag("images", "Enables parsing links of images. Disabled by default for legal reasons.").BoolVar(&webEnableImages)
+
 	kingpin.Parse()

 	if len(nickname) == 0 {
@ -74,25 +83,36 @@ func main() {
 	m := manager.NewManager()

 	// Load youtube parser
-	youtubeParser := &youtube.Parser{
-		Config: &youtube.Config{ApiKey: youtubeApiKey},
+	if len(youtubeApiKey) > 0 {
+		youtubeParser := &youtube.Parser{
+			Config: &youtube.Config{ApiKey: youtubeApiKey},
+		}
+		must(m.RegisterParser(youtubeParser))
+	} else {
+		log.Println("No YouTube API key provided, YouTube parsing via API is disabled.")
 	}
-	must(m.RegisterParser(youtubeParser))

 	// Load soundcloud parser
-	soundcloudParser := &soundcloud.Parser{
-		Config: &soundcloud.Config{
-			ClientId:     soundcloudClientId,
-			ClientSecret: soundcloudClientSecret,
-		},
+	if len(soundcloudClientId) > 0 && len(soundcloudClientSecret) > 0 {
+		soundcloudParser := &soundcloud.Parser{
+			Config: &soundcloud.Config{
+				ClientId:     soundcloudClientId,
+				ClientSecret: soundcloudClientSecret,
+			},
+		}
+		must(m.RegisterParser(soundcloudParser))
+	} else {
+		log.Println("No SoundCloud client ID or secret provided, SoundCloud parsing via API is disabled.")
 	}
-	must(m.RegisterParser(soundcloudParser))

 	// Load wikipedia parser
 	must(m.RegisterParser(new(wikipedia.Parser)))

 	// Load web parser
-	must(m.RegisterParser(new(web.Parser)))
+	webParser := &web.Parser{
+		EnableImages: webEnableImages,
+	}
+	must(m.RegisterParser(webParser))

 	// IRC
 	conn := m.AntifloodIrcConn(irc.IRC(nickname, ident))
@ -129,6 +149,8 @@ func main() {
 	conn.AddCallback("JOIN", func(e *irc.Event) {
 		// Is this JOIN not about us?
 		if !strings.EqualFold(e.Nick, conn.GetNick()) {
+			// Save this user's details for a temporary ignore
+			m.NotifyUserJoined(e.Arguments[0], e.Source)
 			return
 		}

@ -138,44 +160,46 @@ func main() {
 		default:
 		}
 	})
-	conn.AddCallback("INVITE", func(e *irc.Event) {
-		// Is this INVITE not for us?
-		if !strings.EqualFold(e.Arguments[0], conn.GetNick()) {
-			return
-		}
-
-		// Asynchronous notification
-		select {
-		case inviteChan <- e.Arguments[1]:
-		default:
-		}
-
-		// We have been invited, autojoin!
-		go func(sourceNick string, targetChannel string) {
-		joinWaitLoop:
-			for {
-				select {
-				case channel := <-joinChan:
-					if strings.EqualFold(channel, targetChannel) {
-						// TODO - Thanks message
-						time.Sleep(1 * time.Second)
-						conn.Privmsgf(targetChannel, "Thanks for inviting me, %s! I am %s, the friendly bot that shows information about links posted in this channel. I hope I can be of great help for everyone here in %s! :)", sourceNick, conn.GetNick(), targetChannel)
-						time.Sleep(2 * time.Second)
-						conn.Privmsg(targetChannel, "If you ever run into trouble with me (or find any bugs), please us the channel #MediaLink for contact on this IRC.")
-						break joinWaitLoop
-					}
-				case channel := <-inviteChan:
-					if strings.EqualFold(channel, targetChannel) {
-						break joinWaitLoop
-					}
-				case <-time.After(time.Minute):
-					log.Printf("WARNING: Timed out waiting for us to join %s as we got invited", targetChannel)
-					break joinWaitLoop
-				}
+	if !noInvite {
+		conn.AddCallback("INVITE", func(e *irc.Event) {
+			// Is this INVITE not for us?
+			if !strings.EqualFold(e.Arguments[0], conn.GetNick()) {
+				return
 			}
-		}(e.Nick, e.Arguments[1])
-		conn.Join(e.Arguments[1])
-	})
+
+			// Asynchronous notification
+			select {
+			case inviteChan <- e.Arguments[1]:
+			default:
+			}
+
+			// We have been invited, autojoin!
+			go func(sourceNick string, targetChannel string) {
+			joinWaitLoop:
+				for {
+					select {
+					case channel := <-joinChan:
+						if strings.EqualFold(channel, targetChannel) {
+							// TODO - Thanks message
+							time.Sleep(1 * time.Second)
+							conn.Privmsgf(targetChannel, "Thanks for inviting me, %s! I am %s, the friendly bot that shows information about links posted in this channel. I hope I can be of great help for everyone here in %s! :)", sourceNick, conn.GetNick(), targetChannel)
+							time.Sleep(2 * time.Second)
+							conn.Privmsg(targetChannel, "If you ever run into trouble with me (or find any bugs), please use the channel #MediaLink for contact on this IRC.")
+							break joinWaitLoop
+						}
+					case channel := <-inviteChan:
+						if strings.EqualFold(channel, targetChannel) {
+							break joinWaitLoop
+						}
+					case <-time.After(time.Minute):
+						log.Printf("WARNING: Timed out waiting for us to join %s as we got invited", targetChannel)
+						break joinWaitLoop
+					}
+				}
+			}(e.Nick, e.Arguments[1])
+			conn.Join(e.Arguments[1])
+		})
+	}
 	conn.AddCallback("PRIVMSG", func(e *irc.Event) {
 		go func(event *irc.Event) {
 			//sender := event.Nick
@ -196,6 +220,12 @@ func main() {

 			log.Printf("<%s @ %s> %s", event.Nick, target, msg)

+			// Ignore user if they just joined
+			if shouldIgnore := m.TrackUser(target, event.Source); shouldIgnore {
+				log.Print("This message will be ignored since the user just joined.")
+				return
+			}
+
 			urlStr := xurls.Relaxed.FindString(msg)

 			switch {
--- a/main.tpl
+++ b/main.tpl
@ -21,10 +21,17 @@
 	{{- else -}}
 		Link info
 	{{- end -}}
-	{{- bold }}
+	{{- reset }}

 	»
 	
+	{{- if index . "AgeRestriction" }}
+		{{ color 4 -}}
+		{{ bold -}}
+		[{{- index . "AgeRestriction" }}]
+		{{- reset }}
+	{{- end }}
+	
 	{{ if index . "IsProfile" }}
 		{{- if index . "Title" }}
 			{{ bold -}}
@ -60,19 +67,22 @@
 				({{ . }})
 			{{ end }}
 		{{ else }}
-			{{ if index . "Description" }}
-				{{ excerpt 384 (index . "Description") }}
-			{{ else }}
-				{{ with index . "ImageType" }}
-					{{ . }} image,
+			{{ with index . "Description" }}
+				{{ excerpt 384 . }}
+			{{ end }}
+		{{ end }}
+		
+		{{ if index . "ImageType" }}
+			{{ if index . "Title" }}
+				·
+			{{ end }}
+			{{ .ImageType }} image,
+			{{ if (index . "ImageSize") (index . "Size") }}
+				{{ with index . "ImageSize" }}
+					{{ .X }}×{{ .Y }}
 				{{ end }}
-				{{ if (index . "ImageSize") (index . "Size") }}
-					{{ with index . "ImageSize" }}
-						{{ .X }}x{{ .Y }}
-					{{ end }}
-					{{ with index . "Size" }}
-						({{ size . }})
-					{{ end }}
+				{{ with index . "Size" }}
+					({{ size . }})
 				{{ end }}
 			{{ end }}
 		{{ end }}
--- a/manager/antiflood.go
+++ b/manager/antiflood.go
@ -17,6 +17,28 @@ func (m *Manager) initAntiflood() {
 	m.cache = cache.New(1*time.Minute, 5*time.Second)
 }

+func (m *Manager) TrackUser(target string, source string) (shouldIgnore bool) {
+	key := normalizeUserAntiflood(target, source)
+
+	if _, ok := m.cache.Get(key); ok {
+		// User just joined here recently, ignore them
+		shouldIgnore = true
+	}
+
+	return
+}
+
+func (m *Manager) NotifyUserJoined(target string, source string) {
+	key := normalizeUserAntiflood(target, source)
+
+	// When a user joins, he will be ignored for the first 30 seconds,
+	// enough to prevent parsing links from people who only join to spam their
+	// links immediately
+	if _, exists := m.cache.Get(key); !exists {
+		m.cache.Add(key, nil, 30*time.Second)
+	}
+}
+
 func (m *Manager) TrackUrl(target string, u *url.URL) (shouldIgnore bool) {
 	key := normalizeUrlAntiflood(target, u)

@ -70,6 +92,17 @@ func normalizeTextAntiflood(target, text string) string {
 	return fmt.Sprintf("TEXT/%s/%X", strings.ToUpper(target), s.Sum([]byte{}))
 }

+func normalizeUserAntiflood(target, source string) string {
+	sourceSplitHost := strings.SplitN(source, "@", 2)
+	sourceSplitHostname := strings.Split(sourceSplitHost[1], ".")
+	if len(sourceSplitHostname) > 1 &&
+		strings.EqualFold(sourceSplitHostname[len(sourceSplitHostname)-1], "IP") {
+		sourceSplitHostname[0] = "*"
+	}
+	source = fmt.Sprintf("%s!%s@%s", "*", "*", strings.Join(sourceSplitHostname, "."))
+	return fmt.Sprintf("USER/%s/%s", strings.ToUpper(target), source)
+}
+
 // Proxies several methods of the IRC connection in order to drop repeated messages
 type ircConnectionProxy struct {
 	*irc.Connection
--- a/manager/parser.go
+++ b/manager/parser.go
@ -52,33 +52,40 @@ func (m *Manager) RegisterParser(parser Parser) error {
 	return nil
 }

-func (m *Manager) Parse(u *url.URL) (string, parsers.ParseResult) {
-	var oldU *url.URL
+func (m *Manager) Parse(currentUrl *url.URL) (string, parsers.ParseResult) {
+	var referer *url.URL
 	attempt := 0
 followLoop:
-	for u != nil {
-		log.Printf("Parsing %s (referer %s)...", u, oldU)
+	for currentUrl != nil {
 		attempt++
 		if attempt > 15 {
-			log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", u)
+			log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", currentUrl)
 			break
 		}
 		for _, p := range m.GetParsers() {
-			r := p.Parse(u, oldU)
+			var refererCopy *url.URL
+			if referer != nil {
+				refererCopy = &url.URL{}
+				*refererCopy = *referer
+			}
+			currentUrlCopy := &url.URL{}
+			*currentUrlCopy = *currentUrl
+			r := p.Parse(currentUrlCopy, refererCopy)
 			if r.Ignored {
 				continue
 			}
 			if r.FollowUrl != nil {
-				if *u == *r.FollowUrl {
+				if *currentUrl == *r.FollowUrl {
 					log.Printf("WARNING: Ignoring request to follow to same URL, ignoring.")
 					break followLoop
 				}
-				oldU, u = u, r.FollowUrl
+				referer = currentUrl
+				currentUrl = r.FollowUrl
 				continue followLoop
 			}
 			return p.Name(), r
 		}
-		u = nil
+		currentUrl = nil
 	}

 	// No parser matches, link ignored
--- a/parsers/web/parser.go
+++ b/parsers/web/parser.go
@ -2,9 +2,9 @@ package web

 import (
 	"errors"
-	"log"
 	"net/http"
 	"net/url"
+	"regexp"
 	"strings"

 	"golang.org/x/net/html"
@ -16,14 +16,25 @@ import (
 	_ "image/png"

 	"github.com/icedream/irc-medialink/parsers"
+	"github.com/icedream/irc-medialink/util/limitedio"
 	"github.com/yhat/scrape"
 )

 var (
 	ErrCorruptedImage = errors.New("Corrupted image.")
+
+	rxNewlines = regexp.MustCompile(`(?:\r?\n)+`)
 )

-type Parser struct{}
+const (
+	runeHash    = '#'
+	noTitleStr  = "(no title)"
+	maxHtmlSize = 8 * 1024
+)
+
+type Parser struct {
+	EnableImages bool
+}

 func (p *Parser) Init() error {
 	return nil
@ -40,6 +51,12 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
 		return
 	}

+	// Remove hash reference from URL since that's not meant to be in the request
+	if strings.Contains(u.Path, string(runeHash)) {
+		u = &(*u) // avoid modifying original URL object
+		u.Path = u.Path[0:strings.IndexRune(u.Path, runeHash)]
+	}
+
 	// Make request
 	req, err := http.NewRequest("GET", u.String(), nil)
 	if err != nil {
@ -49,12 +66,11 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
 	if referer != nil {
 		req.Header.Set("Referer", referer.String())
 	}
+	req.Header.Set("User-Agent", "MediaLink IRC Bot")
 	if resp, err := http.DefaultTransport.RoundTrip(req); err != nil {
-		log.Print("HTTP Get failed")
 		result.Error = err
 		return
 	} else {
-		log.Printf("Web parser result: %+v", resp)
 		defer resp.Body.Close()
 		if 300 <= resp.StatusCode && resp.StatusCode < 400 {
 			if u2, err := resp.Location(); err == nil && u2 != nil && *u2 != *u {
@ -75,47 +91,60 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
 		if sep < 0 {
 			sep = len(contentType)
 		}
-		log.Print(contentType[0:sep])
 		switch strings.ToLower(contentType[0:sep]) {
 		case "text/html":
 			// Parse the page
-			root, err := html.Parse(resp.Body)
+			var contentLength int
+			if resp.ContentLength < 0 || resp.ContentLength > maxHtmlSize {
+				contentLength = maxHtmlSize
+			} else {
+				contentLength = int(resp.ContentLength)
+			}
+			limitedBody := limitedio.NewLimitedReader(resp.Body, contentLength)
+			root, err := html.Parse(limitedBody)
 			if err != nil {
 				result.Error = err
 				return
 			}
 			// Search for the title
+			result.Information = []map[string]interface{}{
+				map[string]interface{}{
+					"IsUpload": false,
+				},
+			}
 			title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
 			if ok {
 				// Got it!
-				result.Information = []map[string]interface{}{
-					map[string]interface{}{
-						"IsUpload": false,
-						"Title":    scrape.Text(title),
-					},
-				}
+				result.Information[0]["Title"] = rxNewlines.ReplaceAllString(scrape.Text(title), " ")
 			} else {
-				result.Ignored = true
+				// No title found
+				result.Information[0]["Title"] = noTitleStr
 			}
 		case "image/png", "image/jpeg", "image/gif":
-			log.Print("Parsing image...")
-			if m, imgType, err := image.DecodeConfig(resp.Body); err != nil {
-				result.UserError = ErrCorruptedImage
-			} else {
-				info := map[string]interface{}{
-					"IsUpload":  true,
-					"ImageSize": image.Point{X: m.Width, Y: m.Height},
-					"ImageType": strings.ToUpper(imgType),
+			if p.EnableImages {
+
+				// No need to limit the reader to a specific size here as
+				// image.DecodeConfig only reads as much as needed anyways.
+				if m, imgType, err := image.DecodeConfig(resp.Body); err != nil {
+					result.UserError = ErrCorruptedImage
+				} else {
+					info := map[string]interface{}{
+						"IsUpload":  true,
+						"ImageSize": image.Point{X: m.Width, Y: m.Height},
+						"ImageType": strings.ToUpper(imgType),
+						"Title":     u.Path[strings.LastIndex(u.Path, "/")+1:],
+					}
+					if resp.ContentLength > 0 {
+						info["Size"] = uint64(resp.ContentLength)
+					}
+					result.Information = []map[string]interface{}{info}
 				}
-				if resp.ContentLength > 0 {
-					info["Size"] = uint64(resp.ContentLength)
-				}
-				result.Information = []map[string]interface{}{info}
-				log.Printf("Got through: %+v!", info)
+				break
 			}
+
+			fallthrough
 		default:
 			// TODO - Implement generic head info?
-			log.Printf("web parser: Ignoring content of type %s", resp.Header.Get("content-type"))
 			result.Ignored = true
 		}
 	}
--- a/parsers/web/parser_test.go
+++ b/parsers/web/parser_test.go
@ -0,0 +1,133 @@
+package web
+
+import (
+	"net/url"
+	"testing"
+	"time"
+
+	"github.com/icedream/irc-medialink/parsers"
+	"github.com/stretchr/testify/assert"
+)
+
+func mustNewParser(t *testing.T) *Parser {
+	p := new(Parser)
+	if !assert.Nil(t, p.Init(), "Parser.Init must throw no errors") {
+		panic("Can't run test without a proper parser")
+	}
+	return p
+}
+
+func parseWithTimeout(p *Parser, t *testing.T, timeout time.Duration, u *url.URL, ref *url.URL) (retval parsers.ParseResult) {
+	resultChan := make(chan parsers.ParseResult)
+	go func(resultChan chan<- parsers.ParseResult, p *Parser, u *url.URL, ref *url.URL) {
+		resultChan <- p.Parse(u, ref)
+	}(resultChan, p, u, ref)
+
+	select {
+	case r := <-resultChan:
+		retval = r
+		return
+	case <-time.After(timeout):
+		t.Fatal("Didn't succeed parsing URL in time")
+		return
+	}
+}
+
+func Test_Parser_Parse_IRCBotScience_NoTitle(t *testing.T) {
+	p := mustNewParser(t)
+	result := p.Parse(&url.URL{
+		Scheme: "https",
+		Host:   "irc-bot-science.clsr.net",
+		Path:   "notitle",
+	}, nil)
+
+	t.Logf("Result: %+v", result)
+	assert.False(t, result.Ignored)
+	assert.Nil(t, result.Error)
+	assert.Nil(t, result.UserError)
+	assert.Len(t, result.Information, 1)
+	assert.Equal(t, noTitleStr, result.Information[0]["Title"])
+}
+
+func Test_Parser_Parse_IRCBotScience_LongHeaders(t *testing.T) {
+	p := mustNewParser(t)
+	result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
+		Scheme: "https",
+		Host:   "irc-bot-science.clsr.net",
+		Path:   "longheaders",
+	}, nil)
+	for result.FollowUrl != nil {
+		result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
+	}
+
+	t.Logf("Result: %+v", result)
+	assert.True(t, result.Ignored)
+}
+
+func Test_Parser_Parse_IRCBotScience_BigHeader(t *testing.T) {
+	p := mustNewParser(t)
+	result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
+		Scheme: "https",
+		Host:   "irc-bot-science.clsr.net",
+		Path:   "bigheader",
+	}, nil)
+	for result.FollowUrl != nil {
+		result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
+	}
+
+	t.Logf("Result: %+v", result)
+	assert.True(t, result.Ignored)
+}
+
+func Test_Parser_Parse_IRCBotScience_Large(t *testing.T) {
+	p := mustNewParser(t)
+
+	result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
+		Scheme: "https",
+		Host:   "irc-bot-science.clsr.net",
+		Path:   "large",
+	}, nil)
+	for result.FollowUrl != nil {
+		result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
+	}
+
+	t.Logf("Result: %+v", result)
+	assert.False(t, result.Ignored)
+	assert.Nil(t, result.Error)
+	assert.Nil(t, result.UserError)
+	assert.Len(t, result.Information, 1)
+	assert.Equal(t, "If this title is printed, it works correctly.", result.Information[0]["Title"])
+
+}
+
+func Test_Parser_Parse_IRCBotScience_Redirect(t *testing.T) {
+	p := mustNewParser(t)
+	originalUrl := &url.URL{
+		Scheme: "https",
+		Host:   "irc-bot-science.clsr.net",
+		Path:   "redirect",
+	}
+	result := p.Parse(originalUrl, nil)
+
+	t.Logf("Result: %+v", result)
+	assert.False(t, result.Ignored)
+	assert.Nil(t, result.Error)
+	assert.Nil(t, result.UserError)
+	assert.NotNil(t, result.FollowUrl)
+	assert.Equal(t, originalUrl.String(), result.FollowUrl.String())
+}
+
+func Test_Parser_Parse_Hash(t *testing.T) {
+	p := mustNewParser(t)
+	originalUrl := &url.URL{
+		Scheme: "https",
+		Host:   "www.google.com",
+		Path:   "/#invalid",
+	}
+	result := p.Parse(originalUrl, nil)
+
+	t.Logf("Result: %+v", result)
+	assert.False(t, result.Ignored)
+	assert.Nil(t, result.Error)
+	assert.Nil(t, result.UserError)
+}
--- a/parsers/wikipedia/parser.go
+++ b/parsers/wikipedia/parser.go
@ -23,7 +23,10 @@ func (p *Parser) Init() error {
 }

 func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult) {
-	if !strings.HasSuffix(strings.ToLower(u.Host), ".wikipedia.org") {
+	if !(strings.EqualFold(u.Scheme, "http") ||
+		strings.EqualFold(u.Scheme, "https")) ||
+		(!strings.HasSuffix(strings.ToLower(u.Host), ".wikipedia.org") &&
+			!strings.EqualFold(u.Host, "wikipedia.org")) {
 		result.Ignored = true
 		return
 	}
@ -37,6 +40,11 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
 		}

 		// We're using the original host for link localization
+		// or en.wikipedia.org for (www.)wikipedia.org
+		if strings.EqualFold(u.Host, "wikipedia.org") ||
+			strings.EqualFold(u.Host, "www.wikipedia.org") {
+			u.Host = "en.wikipedia.org"
+		}
 		r, err := http.Get("https://" + u.Host + "/api/rest_v1/page/summary/" + titleEscaped)
 		if err != nil {
 			result.Error = err
--- a/parsers/youtube/parser.go
+++ b/parsers/youtube/parser.go
@ -22,9 +22,7 @@ const (
 	youtubeIdType_ChannelId
 	youtubeIdType_Playlist

-	header = "\x031,0You\x030,4Tube\x03" +
-		"99,99" + /* Fix for KiwiIRC not flushing background color on empty color tag */
-		"\x03" /* Fix for Mibbit interpreting 99 as green instead of transparent */
+	header = "\x0301,00You\x0300,04Tube"
 )

 var (
@ -168,6 +166,12 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
 				} else {
 					log.Print(err)
 				}
+
+				if item.ContentDetails.ContentRating != nil {
+					if item.ContentDetails.ContentRating.YtRating == "ytAgeRestricted" {
+						r["AgeRestriction"] = "NSFW"
+					}
+				}
 			}
 			if item.Statistics != nil {
 				r["Views"] = item.Statistics.ViewCount
@ -208,8 +212,7 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
 			r := map[string]interface{}{
 				"Header":      header,
 				"IsProfile":   true,
-				"Title":       "Channel",
-				"Author":      item.Snippet.Title,
+				"Name":        item.Snippet.Title,
 				"CountryCode": item.Snippet.Country,
 				"Description": item.Snippet.Description,
 				"ShortUrl":    item.Snippet.CustomUrl,
--- a/util.go
+++ b/util.go
@ -1,7 +1,6 @@
 package main

 import (
-	"net/url"
 	"regexp"
 	"strings"
 )
@ -28,34 +27,3 @@ func stripIrcFormatting(text string) string {
 	text = rxIrcColor.ReplaceAllLiteralString(text, "")
 	return text
 }
-
-func getYouTubeId(uri *url.URL) string {
-	u := &(*uri)
-	u.Scheme = strings.ToLower(u.Scheme)
-	u.Host = strings.ToLower(u.Host)
-
-	// Must be an HTTP URL
-	if u.Scheme != "http" && u.Scheme != "https" {
-		return ""
-	}
-
-	// Remove www. prefix from hostname
-	if strings.HasPrefix(u.Host, "www.") {
-		u.Host = u.Host[4:]
-	}
-
-	switch strings.ToLower(u.Host) {
-	case "youtu.be":
-		// http://youtu.be/{id}
-		if s, err := url.QueryUnescape(strings.TrimLeft(u.Path, "/")); err == nil {
-			return s
-		} else {
-			return ""
-		}
-	case "youtube.com":
-		// http://youtube.com/watch?v={id}
-		return u.Query().Get("v")
-	}
-
-	return ""
-}
--- a/util/limitedio/limited_reader.go
+++ b/util/limitedio/limited_reader.go
@ -0,0 +1,48 @@
+package limitedio
+
+import "io"
+
+type limitedReader struct {
+	io.Reader
+	rest int
+}
+
+func NewLimitedReader(r io.Reader, limit int) io.Reader {
+	return &limitedReader{r, limit}
+}
+
+func (r *limitedReader) Read(data []byte) (n int, err error) {
+	if r.rest <= 0 {
+		err = io.EOF
+		return
+	}
+
+	var dataSize int
+	if len(data) < r.rest {
+		dataSize = len(data)
+	} else {
+		dataSize = r.rest
+	}
+
+	actualData := make([]byte, dataSize)
+	n, err = r.Reader.Read(actualData)
+	if n > 0 {
+		copy(data, actualData)
+	}
+	r.rest -= (n)
+
+	return
+}
+
+type limitedReadCloser struct {
+	*limitedReader
+	closeMethod func() error
+}
+
+func NewLimitedReadCloser(r io.ReadCloser, limit int) io.Reader {
+	return &limitedReadCloser{&limitedReader{r, limit}, r.Close}
+}
+
+func (rc *limitedReadCloser) Close() error {
+	return rc.closeMethod()
+}
--- a/util_test.go
+++ b/util_test.go
@ -1,33 +1,3 @@
 package main

-import (
-	"net/url"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func mustParseUrl(u string) *url.URL {
-	if uri, err := url.Parse(u); err == nil {
-		return uri
-	} else {
-		panic(err)
-	}
-}
-
-func Test_GetYouTubeId(t *testing.T) {
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://youtube.com/watch?v=aYz-9jUlav-")))
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://youtube.com/watch?v=aYz-9jUlav-")))
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://www.youtube.com/watch?v=aYz-9jUlav-")))
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://www.youtube.com/watch?v=aYz-9jUlav-")))
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://youtu.be/aYz-9jUlav-")))
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://youtu.be/aYz-9jUlav-")))
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://www.youtu.be/aYz-9jUlav-")))
-	assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://www.youtu.be/aYz-9jUlav-")))
-}
-
-func Benchmark_GetYouTubeId(b *testing.B) {
-	for n := 0; n < b.N; n++ {
-		getYouTubeId(mustParseUrl("http://youtube.com/watch?v=aYz-9jUlav-"))
-	}
-}
+// TODO - unit test stripIrcFormatting
Author	SHA1	Message	Date
Icedream	c68a24193c	Fix accidental removal of import path and invalid indirect.	2017-02-22 18:52:33 +01:00
Icedream	a111a35e66	Merge remote-tracking branch 'origin/develop'	2017-02-22 18:47:30 +01:00
Icedream	518dbeaf22	Allow disabling auto-join on invite.	2017-02-22 18:46:58 +01:00
Icedream	bd63c975b8	parsers/web: Remove unused import "log".	2016-08-09 00:15:51 +02:00
Icedream	3faff8264f	Add filename to image information. This fixes #6.	2016-08-08 14:05:55 +02:00
Icedream	1e3af32706	parsers/web: Use unique user agent to get around HTTP error 429 on Reddit. This fixes #12.	2016-08-08 13:53:44 +02:00
Icedream	b37a15ac1f	Introduce "--images" flag to enable image link parsing and disable image link parsing by default.	2016-07-26 20:36:36 +02:00
Icedream	6d70e02641	Remove defect test routines.	2016-07-22 21:42:20 +02:00
Icedream	19708251b9	Do not register SoundCloud/YouTube parsers if no auth details given for them.	2016-07-22 20:48:57 +02:00
Icedream	fb85ad8554	manager: Ignore users for 30 seconds after they join. This should be enough to at least prevent spammers from abusing the bot immediately.	2016-07-06 18:01:39 +02:00
Icedream	2508971be1	parsers/youtube: Fix #11 by using a full reset in the template.	2016-07-06 04:16:26 +02:00
Icedream	2608df9727	parsers/youtube: Add NSFW marker for age-restricted videos.	2016-07-05 16:18:14 +02:00
Icedream	b61927108b	parsers/youtube: Fix information for channels.	2016-07-05 16:13:03 +02:00
Icedream	0eb16f9975	Remove getYouTubeId from util.go. This bit of code has long been moved to the YouTube parser package.	2016-07-05 15:07:50 +02:00
Icedream	f97c872b2e	parsers/wikipedia: Fix HTTP(S) URL filter logic.	2016-07-03 19:03:32 +02:00
Icedream	6de3faa8e0	parsers/wikipedia: Only accept HTTP(S) links.	2016-07-03 18:59:13 +02:00
Icedream	7a131adfb8	parsers/wikipedia: Fix handling of www.wikipedia.org and wikipedia.org links.	2016-07-03 18:57:04 +02:00
Icedream	769e0e90a7	manager: Fix how URLs are passed to parsers. This should prevent parses from being able to leak changes on the original URL object to other parsers. Fixes issue #1.	2016-07-03 18:40:00 +02:00
Icedream	d6a32315f6	parsers/web: Remove extra logging.	2016-06-20 02:45:29 +02:00
Icedream	5c5f5ef478	parsers/web: Compare URLs by their string representations instead. The Path can be different in that the original URL is missing the "/" at the beginning, however the resulting URL may very well contain a "/" at the beginning. In the resulting string representation this doesn't make any difference.	2016-06-20 02:44:51 +02:00
Icedream	dc5597c054	parsers/web: Remove hash reference when parsing URL. Fixes #8.	2016-06-20 02:43:30 +02:00
Icedream	280da493fb	parsers/web: Add test functions.	2016-06-20 02:30:27 +02:00
Icedream	2163bfc99f	parsers/web: Remove extra logging lines.	2016-06-20 02:30:10 +02:00
Icedream	b99af84dc5	travis: No testing against old Go versions. Go 1.5.4 does not support the whitespace trim syntax for templates. Since we make excessive use of it, we can not support that version of Go.	2016-06-20 00:41:59 +02:00
Icedream	35156593dc	travis: Correct Go versions.	2016-06-20 00:29:53 +02:00
Icedream	ae1dce4bce	New-line fix caused extra spaces between each character.	2016-06-19 23:34:57 +02:00
Icedream	8696313f8e	Move "(no title)" text to its own constant noTitleStr. Main purpose is for making integration testing easier later.	2016-06-19 23:32:27 +02:00
Icedream	ec899f0ddf	Replace new-line characters in HTML title with space. Targets #2.	2016-06-19 23:31:57 +02:00
Icedream	6775fe5100	parsers/web: Limit HTML parsing to first 8 kB and use Content-Length header. Targets #2.	2016-06-19 23:09:22 +02:00
Icedream	be2edc845a	Add Travis CI. Fixes #4.	2016-06-19 22:36:55 +02:00
Icedream	b234f732df	Use actual cross char (×) for image dimensions. Fixes #7.	2016-06-19 22:27:16 +02:00
Icedream	8e55200ff4	Readme: Fix headings.	2016-06-17 07:37:54 +02:00
Icedream	454d3ccada	Fix typo.	2016-06-12 01:32:13 +02:00