Compare commits
33 Commits
Author | SHA1 | Date |
---|---|---|
|
c68a24193c | |
|
a111a35e66 | |
|
518dbeaf22 | |
|
bd63c975b8 | |
|
3faff8264f | |
|
1e3af32706 | |
|
b37a15ac1f | |
|
6d70e02641 | |
|
19708251b9 | |
|
fb85ad8554 | |
|
2508971be1 | |
|
2608df9727 | |
|
b61927108b | |
|
0eb16f9975 | |
|
f97c872b2e | |
|
6de3faa8e0 | |
|
7a131adfb8 | |
|
769e0e90a7 | |
|
d6a32315f6 | |
|
5c5f5ef478 | |
|
dc5597c054 | |
|
280da493fb | |
|
2163bfc99f | |
|
b99af84dc5 | |
|
35156593dc | |
|
ae1dce4bce | |
|
8696313f8e | |
|
ec899f0ddf | |
|
6775fe5100 | |
|
be2edc845a | |
|
b234f732df | |
|
8e55200ff4 | |
|
454d3ccada |
|
@ -0,0 +1,5 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.6.2
|
||||
- tip
|
10
README.md
10
README.md
|
@ -1,5 +1,7 @@
|
|||
# MediaLink IRC Bot
|
||||
|
||||
[](https://travis-ci.org/icedream/irc-medialink)
|
||||
|
||||
This IRC bot automatically parses links posted in chat rooms and prints information about them.
|
||||
|
||||
Currently explicit support has been built in for:
|
||||
|
@ -11,7 +13,7 @@ Currently explicit support has been built in for:
|
|||
|
||||
Generally, for websites that are not directly supported the bot will print the page title.
|
||||
|
||||
# How to run the bot
|
||||
## How to run the bot
|
||||
|
||||
In order to properly run the bot, you need to [register a SoundCloud application](http://soundcloud.com/you/apps/new) and [get a YouTube Data API key](https://console.developers.google.com/apis/api/youtube/overview) for it and then feed the API data to the bot through the command line arguments.
|
||||
|
||||
|
@ -25,7 +27,7 @@ Then you can find out which options you can pass to the bot directly by running
|
|||
|
||||
You need to at least pass the `--server`, `--youtube-key`, `--soundcloud-id` and `--soundcloud-secret` parameters.
|
||||
|
||||
# ...with Docker
|
||||
### ...with Docker
|
||||
|
||||
You can use the `icedream/irc-medialink` image in order to run this bot in Docker. You can pull it using this command:
|
||||
|
||||
|
@ -54,12 +56,12 @@ services:
|
|||
restart: always
|
||||
```
|
||||
|
||||
# Support
|
||||
## Support
|
||||
|
||||
This bot is officially tested and running on the LibraIRC IRC network (irc.librairc.net) though also being able to run on other IRC networks.
|
||||
|
||||
For support on LibraIRC please use the channel #MediaLink there to get in contact with Icedream.
|
||||
|
||||
# License
|
||||
## License
|
||||
|
||||
This project is licensed under the **GNU General Public License Version 2 or later**. For more information check the [LICENSE](LICENSE) file.
|
||||
|
|
34
main.go
34
main.go
|
@ -32,7 +32,10 @@ func main() {
|
|||
var soundcloudClientId string
|
||||
var soundcloudClientSecret string
|
||||
|
||||
var webEnableImages bool
|
||||
|
||||
var debug bool
|
||||
var noInvite bool
|
||||
var useTLS bool
|
||||
var server string
|
||||
var password string
|
||||
|
@ -48,6 +51,7 @@ func main() {
|
|||
kingpin.Flag("nick", "The nickname.").Short('n').StringVar(&nickname)
|
||||
kingpin.Flag("ident", "The ident.").Short('i').StringVar(&ident)
|
||||
kingpin.Flag("debug", "Enables debug mode.").Short('d').BoolVar(&debug)
|
||||
kingpin.Flag("no-invite", "Disables auto-join on invite.").BoolVar(&noInvite)
|
||||
kingpin.Flag("tls", "Use TLS.").BoolVar(&useTLS)
|
||||
kingpin.Flag("server", "The server to connect to.").Short('s').StringVar(&server)
|
||||
kingpin.Flag("password", "The password to use for logging into the IRC server.").Short('p').StringVar(&password)
|
||||
|
@ -58,9 +62,14 @@ func main() {
|
|||
|
||||
// Youtube config
|
||||
kingpin.Flag("youtube-key", "The API key to use to access the YouTube API.").StringVar(&youtubeApiKey)
|
||||
|
||||
// SoundCloud config
|
||||
kingpin.Flag("soundcloud-id", "The SoundCloud ID.").StringVar(&soundcloudClientId)
|
||||
kingpin.Flag("soundcloud-secret", "The SoundCloud secret.").StringVar(&soundcloudClientSecret)
|
||||
|
||||
// Web parser config
|
||||
kingpin.Flag("images", "Enables parsing links of images. Disabled by default for legal reasons.").BoolVar(&webEnableImages)
|
||||
|
||||
kingpin.Parse()
|
||||
|
||||
if len(nickname) == 0 {
|
||||
|
@ -74,12 +83,17 @@ func main() {
|
|||
m := manager.NewManager()
|
||||
|
||||
// Load youtube parser
|
||||
if len(youtubeApiKey) > 0 {
|
||||
youtubeParser := &youtube.Parser{
|
||||
Config: &youtube.Config{ApiKey: youtubeApiKey},
|
||||
}
|
||||
must(m.RegisterParser(youtubeParser))
|
||||
} else {
|
||||
log.Println("No YouTube API key provided, YouTube parsing via API is disabled.")
|
||||
}
|
||||
|
||||
// Load soundcloud parser
|
||||
if len(soundcloudClientId) > 0 && len(soundcloudClientSecret) > 0 {
|
||||
soundcloudParser := &soundcloud.Parser{
|
||||
Config: &soundcloud.Config{
|
||||
ClientId: soundcloudClientId,
|
||||
|
@ -87,12 +101,18 @@ func main() {
|
|||
},
|
||||
}
|
||||
must(m.RegisterParser(soundcloudParser))
|
||||
} else {
|
||||
log.Println("No SoundCloud client ID or secret provided, SoundCloud parsing via API is disabled.")
|
||||
}
|
||||
|
||||
// Load wikipedia parser
|
||||
must(m.RegisterParser(new(wikipedia.Parser)))
|
||||
|
||||
// Load web parser
|
||||
must(m.RegisterParser(new(web.Parser)))
|
||||
webParser := &web.Parser{
|
||||
EnableImages: webEnableImages,
|
||||
}
|
||||
must(m.RegisterParser(webParser))
|
||||
|
||||
// IRC
|
||||
conn := m.AntifloodIrcConn(irc.IRC(nickname, ident))
|
||||
|
@ -129,6 +149,8 @@ func main() {
|
|||
conn.AddCallback("JOIN", func(e *irc.Event) {
|
||||
// Is this JOIN not about us?
|
||||
if !strings.EqualFold(e.Nick, conn.GetNick()) {
|
||||
// Save this user's details for a temporary ignore
|
||||
m.NotifyUserJoined(e.Arguments[0], e.Source)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -138,6 +160,7 @@ func main() {
|
|||
default:
|
||||
}
|
||||
})
|
||||
if !noInvite {
|
||||
conn.AddCallback("INVITE", func(e *irc.Event) {
|
||||
// Is this INVITE not for us?
|
||||
if !strings.EqualFold(e.Arguments[0], conn.GetNick()) {
|
||||
|
@ -161,7 +184,7 @@ func main() {
|
|||
time.Sleep(1 * time.Second)
|
||||
conn.Privmsgf(targetChannel, "Thanks for inviting me, %s! I am %s, the friendly bot that shows information about links posted in this channel. I hope I can be of great help for everyone here in %s! :)", sourceNick, conn.GetNick(), targetChannel)
|
||||
time.Sleep(2 * time.Second)
|
||||
conn.Privmsg(targetChannel, "If you ever run into trouble with me (or find any bugs), please us the channel #MediaLink for contact on this IRC.")
|
||||
conn.Privmsg(targetChannel, "If you ever run into trouble with me (or find any bugs), please use the channel #MediaLink for contact on this IRC.")
|
||||
break joinWaitLoop
|
||||
}
|
||||
case channel := <-inviteChan:
|
||||
|
@ -176,6 +199,7 @@ func main() {
|
|||
}(e.Nick, e.Arguments[1])
|
||||
conn.Join(e.Arguments[1])
|
||||
})
|
||||
}
|
||||
conn.AddCallback("PRIVMSG", func(e *irc.Event) {
|
||||
go func(event *irc.Event) {
|
||||
//sender := event.Nick
|
||||
|
@ -196,6 +220,12 @@ func main() {
|
|||
|
||||
log.Printf("<%s @ %s> %s", event.Nick, target, msg)
|
||||
|
||||
// Ignore user if they just joined
|
||||
if shouldIgnore := m.TrackUser(target, event.Source); shouldIgnore {
|
||||
log.Print("This message will be ignored since the user just joined.")
|
||||
return
|
||||
}
|
||||
|
||||
urlStr := xurls.Relaxed.FindString(msg)
|
||||
|
||||
switch {
|
||||
|
|
26
main.tpl
26
main.tpl
|
@ -21,10 +21,17 @@
|
|||
{{- else -}}
|
||||
Link info
|
||||
{{- end -}}
|
||||
{{- bold }}
|
||||
{{- reset }}
|
||||
|
||||
»
|
||||
|
||||
{{- if index . "AgeRestriction" }}
|
||||
{{ color 4 -}}
|
||||
{{ bold -}}
|
||||
[{{- index . "AgeRestriction" }}]
|
||||
{{- reset }}
|
||||
{{- end }}
|
||||
|
||||
{{ if index . "IsProfile" }}
|
||||
{{- if index . "Title" }}
|
||||
{{ bold -}}
|
||||
|
@ -60,15 +67,19 @@
|
|||
({{ . }})
|
||||
{{ end }}
|
||||
{{ else }}
|
||||
{{ if index . "Description" }}
|
||||
{{ excerpt 384 (index . "Description") }}
|
||||
{{ else }}
|
||||
{{ with index . "ImageType" }}
|
||||
{{ . }} image,
|
||||
{{ with index . "Description" }}
|
||||
{{ excerpt 384 . }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
||||
{{ if index . "ImageType" }}
|
||||
{{ if index . "Title" }}
|
||||
·
|
||||
{{ end }}
|
||||
{{ .ImageType }} image,
|
||||
{{ if (index . "ImageSize") (index . "Size") }}
|
||||
{{ with index . "ImageSize" }}
|
||||
{{ .X }}x{{ .Y }}
|
||||
{{ .X }}×{{ .Y }}
|
||||
{{ end }}
|
||||
{{ with index . "Size" }}
|
||||
({{ size . }})
|
||||
|
@ -76,7 +87,6 @@
|
|||
{{ end }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
||||
{{ if or (index . "Author") }}
|
||||
{{ if index . "Author" }}
|
||||
|
|
|
@ -17,6 +17,28 @@ func (m *Manager) initAntiflood() {
|
|||
m.cache = cache.New(1*time.Minute, 5*time.Second)
|
||||
}
|
||||
|
||||
func (m *Manager) TrackUser(target string, source string) (shouldIgnore bool) {
|
||||
key := normalizeUserAntiflood(target, source)
|
||||
|
||||
if _, ok := m.cache.Get(key); ok {
|
||||
// User just joined here recently, ignore them
|
||||
shouldIgnore = true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (m *Manager) NotifyUserJoined(target string, source string) {
|
||||
key := normalizeUserAntiflood(target, source)
|
||||
|
||||
// When a user joins, he will be ignored for the first 30 seconds,
|
||||
// enough to prevent parsing links from people who only join to spam their
|
||||
// links immediately
|
||||
if _, exists := m.cache.Get(key); !exists {
|
||||
m.cache.Add(key, nil, 30*time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Manager) TrackUrl(target string, u *url.URL) (shouldIgnore bool) {
|
||||
key := normalizeUrlAntiflood(target, u)
|
||||
|
||||
|
@ -70,6 +92,17 @@ func normalizeTextAntiflood(target, text string) string {
|
|||
return fmt.Sprintf("TEXT/%s/%X", strings.ToUpper(target), s.Sum([]byte{}))
|
||||
}
|
||||
|
||||
func normalizeUserAntiflood(target, source string) string {
|
||||
sourceSplitHost := strings.SplitN(source, "@", 2)
|
||||
sourceSplitHostname := strings.Split(sourceSplitHost[1], ".")
|
||||
if len(sourceSplitHostname) > 1 &&
|
||||
strings.EqualFold(sourceSplitHostname[len(sourceSplitHostname)-1], "IP") {
|
||||
sourceSplitHostname[0] = "*"
|
||||
}
|
||||
source = fmt.Sprintf("%s!%s@%s", "*", "*", strings.Join(sourceSplitHostname, "."))
|
||||
return fmt.Sprintf("USER/%s/%s", strings.ToUpper(target), source)
|
||||
}
|
||||
|
||||
// Proxies several methods of the IRC connection in order to drop repeated messages
|
||||
type ircConnectionProxy struct {
|
||||
*irc.Connection
|
||||
|
|
|
@ -52,33 +52,40 @@ func (m *Manager) RegisterParser(parser Parser) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Parse(u *url.URL) (string, parsers.ParseResult) {
|
||||
var oldU *url.URL
|
||||
func (m *Manager) Parse(currentUrl *url.URL) (string, parsers.ParseResult) {
|
||||
var referer *url.URL
|
||||
attempt := 0
|
||||
followLoop:
|
||||
for u != nil {
|
||||
log.Printf("Parsing %s (referer %s)...", u, oldU)
|
||||
for currentUrl != nil {
|
||||
attempt++
|
||||
if attempt > 15 {
|
||||
log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", u)
|
||||
log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", currentUrl)
|
||||
break
|
||||
}
|
||||
for _, p := range m.GetParsers() {
|
||||
r := p.Parse(u, oldU)
|
||||
var refererCopy *url.URL
|
||||
if referer != nil {
|
||||
refererCopy = &url.URL{}
|
||||
*refererCopy = *referer
|
||||
}
|
||||
currentUrlCopy := &url.URL{}
|
||||
*currentUrlCopy = *currentUrl
|
||||
r := p.Parse(currentUrlCopy, refererCopy)
|
||||
if r.Ignored {
|
||||
continue
|
||||
}
|
||||
if r.FollowUrl != nil {
|
||||
if *u == *r.FollowUrl {
|
||||
if *currentUrl == *r.FollowUrl {
|
||||
log.Printf("WARNING: Ignoring request to follow to same URL, ignoring.")
|
||||
break followLoop
|
||||
}
|
||||
oldU, u = u, r.FollowUrl
|
||||
referer = currentUrl
|
||||
currentUrl = r.FollowUrl
|
||||
continue followLoop
|
||||
}
|
||||
return p.Name(), r
|
||||
}
|
||||
u = nil
|
||||
currentUrl = nil
|
||||
}
|
||||
|
||||
// No parser matches, link ignored
|
||||
|
|
|
@ -2,9 +2,9 @@ package web
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
|
@ -16,14 +16,25 @@ import (
|
|||
_ "image/png"
|
||||
|
||||
"github.com/icedream/irc-medialink/parsers"
|
||||
"github.com/icedream/irc-medialink/util/limitedio"
|
||||
"github.com/yhat/scrape"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrCorruptedImage = errors.New("Corrupted image.")
|
||||
|
||||
rxNewlines = regexp.MustCompile(`(?:\r?\n)+`)
|
||||
)
|
||||
|
||||
type Parser struct{}
|
||||
const (
|
||||
runeHash = '#'
|
||||
noTitleStr = "(no title)"
|
||||
maxHtmlSize = 8 * 1024
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
EnableImages bool
|
||||
}
|
||||
|
||||
func (p *Parser) Init() error {
|
||||
return nil
|
||||
|
@ -40,6 +51,12 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
|||
return
|
||||
}
|
||||
|
||||
// Remove hash reference from URL since that's not meant to be in the request
|
||||
if strings.Contains(u.Path, string(runeHash)) {
|
||||
u = &(*u) // avoid modifying original URL object
|
||||
u.Path = u.Path[0:strings.IndexRune(u.Path, runeHash)]
|
||||
}
|
||||
|
||||
// Make request
|
||||
req, err := http.NewRequest("GET", u.String(), nil)
|
||||
if err != nil {
|
||||
|
@ -49,12 +66,11 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
|||
if referer != nil {
|
||||
req.Header.Set("Referer", referer.String())
|
||||
}
|
||||
req.Header.Set("User-Agent", "MediaLink IRC Bot")
|
||||
if resp, err := http.DefaultTransport.RoundTrip(req); err != nil {
|
||||
log.Print("HTTP Get failed")
|
||||
result.Error = err
|
||||
return
|
||||
} else {
|
||||
log.Printf("Web parser result: %+v", resp)
|
||||
defer resp.Body.Close()
|
||||
if 300 <= resp.StatusCode && resp.StatusCode < 400 {
|
||||
if u2, err := resp.Location(); err == nil && u2 != nil && *u2 != *u {
|
||||
|
@ -75,30 +91,40 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
|||
if sep < 0 {
|
||||
sep = len(contentType)
|
||||
}
|
||||
log.Print(contentType[0:sep])
|
||||
switch strings.ToLower(contentType[0:sep]) {
|
||||
case "text/html":
|
||||
// Parse the page
|
||||
root, err := html.Parse(resp.Body)
|
||||
var contentLength int
|
||||
if resp.ContentLength < 0 || resp.ContentLength > maxHtmlSize {
|
||||
contentLength = maxHtmlSize
|
||||
} else {
|
||||
contentLength = int(resp.ContentLength)
|
||||
}
|
||||
limitedBody := limitedio.NewLimitedReader(resp.Body, contentLength)
|
||||
root, err := html.Parse(limitedBody)
|
||||
if err != nil {
|
||||
result.Error = err
|
||||
return
|
||||
}
|
||||
// Search for the title
|
||||
title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
|
||||
if ok {
|
||||
// Got it!
|
||||
result.Information = []map[string]interface{}{
|
||||
map[string]interface{}{
|
||||
"IsUpload": false,
|
||||
"Title": scrape.Text(title),
|
||||
},
|
||||
}
|
||||
title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
|
||||
if ok {
|
||||
// Got it!
|
||||
result.Information[0]["Title"] = rxNewlines.ReplaceAllString(scrape.Text(title), " ")
|
||||
} else {
|
||||
result.Ignored = true
|
||||
// No title found
|
||||
result.Information[0]["Title"] = noTitleStr
|
||||
}
|
||||
case "image/png", "image/jpeg", "image/gif":
|
||||
log.Print("Parsing image...")
|
||||
if p.EnableImages {
|
||||
|
||||
// No need to limit the reader to a specific size here as
|
||||
// image.DecodeConfig only reads as much as needed anyways.
|
||||
if m, imgType, err := image.DecodeConfig(resp.Body); err != nil {
|
||||
result.UserError = ErrCorruptedImage
|
||||
} else {
|
||||
|
@ -106,16 +132,19 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
|||
"IsUpload": true,
|
||||
"ImageSize": image.Point{X: m.Width, Y: m.Height},
|
||||
"ImageType": strings.ToUpper(imgType),
|
||||
"Title": u.Path[strings.LastIndex(u.Path, "/")+1:],
|
||||
}
|
||||
if resp.ContentLength > 0 {
|
||||
info["Size"] = uint64(resp.ContentLength)
|
||||
}
|
||||
result.Information = []map[string]interface{}{info}
|
||||
log.Printf("Got through: %+v!", info)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
fallthrough
|
||||
default:
|
||||
// TODO - Implement generic head info?
|
||||
log.Printf("web parser: Ignoring content of type %s", resp.Header.Get("content-type"))
|
||||
result.Ignored = true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
package web
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/icedream/irc-medialink/parsers"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func mustNewParser(t *testing.T) *Parser {
|
||||
p := new(Parser)
|
||||
if !assert.Nil(t, p.Init(), "Parser.Init must throw no errors") {
|
||||
panic("Can't run test without a proper parser")
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func parseWithTimeout(p *Parser, t *testing.T, timeout time.Duration, u *url.URL, ref *url.URL) (retval parsers.ParseResult) {
|
||||
resultChan := make(chan parsers.ParseResult)
|
||||
go func(resultChan chan<- parsers.ParseResult, p *Parser, u *url.URL, ref *url.URL) {
|
||||
resultChan <- p.Parse(u, ref)
|
||||
}(resultChan, p, u, ref)
|
||||
|
||||
select {
|
||||
case r := <-resultChan:
|
||||
retval = r
|
||||
return
|
||||
case <-time.After(timeout):
|
||||
t.Fatal("Didn't succeed parsing URL in time")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func Test_Parser_Parse_IRCBotScience_NoTitle(t *testing.T) {
|
||||
p := mustNewParser(t)
|
||||
result := p.Parse(&url.URL{
|
||||
Scheme: "https",
|
||||
Host: "irc-bot-science.clsr.net",
|
||||
Path: "notitle",
|
||||
}, nil)
|
||||
|
||||
t.Logf("Result: %+v", result)
|
||||
assert.False(t, result.Ignored)
|
||||
assert.Nil(t, result.Error)
|
||||
assert.Nil(t, result.UserError)
|
||||
assert.Len(t, result.Information, 1)
|
||||
assert.Equal(t, noTitleStr, result.Information[0]["Title"])
|
||||
}
|
||||
|
||||
func Test_Parser_Parse_IRCBotScience_LongHeaders(t *testing.T) {
|
||||
p := mustNewParser(t)
|
||||
result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
|
||||
Scheme: "https",
|
||||
Host: "irc-bot-science.clsr.net",
|
||||
Path: "longheaders",
|
||||
}, nil)
|
||||
for result.FollowUrl != nil {
|
||||
result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
|
||||
}
|
||||
|
||||
t.Logf("Result: %+v", result)
|
||||
assert.True(t, result.Ignored)
|
||||
}
|
||||
|
||||
func Test_Parser_Parse_IRCBotScience_BigHeader(t *testing.T) {
|
||||
p := mustNewParser(t)
|
||||
result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
|
||||
Scheme: "https",
|
||||
Host: "irc-bot-science.clsr.net",
|
||||
Path: "bigheader",
|
||||
}, nil)
|
||||
for result.FollowUrl != nil {
|
||||
result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
|
||||
}
|
||||
|
||||
t.Logf("Result: %+v", result)
|
||||
assert.True(t, result.Ignored)
|
||||
}
|
||||
|
||||
func Test_Parser_Parse_IRCBotScience_Large(t *testing.T) {
|
||||
p := mustNewParser(t)
|
||||
|
||||
result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
|
||||
Scheme: "https",
|
||||
Host: "irc-bot-science.clsr.net",
|
||||
Path: "large",
|
||||
}, nil)
|
||||
for result.FollowUrl != nil {
|
||||
result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
|
||||
}
|
||||
|
||||
t.Logf("Result: %+v", result)
|
||||
assert.False(t, result.Ignored)
|
||||
assert.Nil(t, result.Error)
|
||||
assert.Nil(t, result.UserError)
|
||||
assert.Len(t, result.Information, 1)
|
||||
assert.Equal(t, "If this title is printed, it works correctly.", result.Information[0]["Title"])
|
||||
|
||||
}
|
||||
|
||||
func Test_Parser_Parse_IRCBotScience_Redirect(t *testing.T) {
|
||||
p := mustNewParser(t)
|
||||
originalUrl := &url.URL{
|
||||
Scheme: "https",
|
||||
Host: "irc-bot-science.clsr.net",
|
||||
Path: "redirect",
|
||||
}
|
||||
result := p.Parse(originalUrl, nil)
|
||||
|
||||
t.Logf("Result: %+v", result)
|
||||
assert.False(t, result.Ignored)
|
||||
assert.Nil(t, result.Error)
|
||||
assert.Nil(t, result.UserError)
|
||||
assert.NotNil(t, result.FollowUrl)
|
||||
assert.Equal(t, originalUrl.String(), result.FollowUrl.String())
|
||||
}
|
||||
|
||||
func Test_Parser_Parse_Hash(t *testing.T) {
|
||||
p := mustNewParser(t)
|
||||
originalUrl := &url.URL{
|
||||
Scheme: "https",
|
||||
Host: "www.google.com",
|
||||
Path: "/#invalid",
|
||||
}
|
||||
result := p.Parse(originalUrl, nil)
|
||||
|
||||
t.Logf("Result: %+v", result)
|
||||
assert.False(t, result.Ignored)
|
||||
assert.Nil(t, result.Error)
|
||||
assert.Nil(t, result.UserError)
|
||||
}
|
|
@ -23,7 +23,10 @@ func (p *Parser) Init() error {
|
|||
}
|
||||
|
||||
func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult) {
|
||||
if !strings.HasSuffix(strings.ToLower(u.Host), ".wikipedia.org") {
|
||||
if !(strings.EqualFold(u.Scheme, "http") ||
|
||||
strings.EqualFold(u.Scheme, "https")) ||
|
||||
(!strings.HasSuffix(strings.ToLower(u.Host), ".wikipedia.org") &&
|
||||
!strings.EqualFold(u.Host, "wikipedia.org")) {
|
||||
result.Ignored = true
|
||||
return
|
||||
}
|
||||
|
@ -37,6 +40,11 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
|||
}
|
||||
|
||||
// We're using the original host for link localization
|
||||
// or en.wikipedia.org for (www.)wikipedia.org
|
||||
if strings.EqualFold(u.Host, "wikipedia.org") ||
|
||||
strings.EqualFold(u.Host, "www.wikipedia.org") {
|
||||
u.Host = "en.wikipedia.org"
|
||||
}
|
||||
r, err := http.Get("https://" + u.Host + "/api/rest_v1/page/summary/" + titleEscaped)
|
||||
if err != nil {
|
||||
result.Error = err
|
||||
|
|
|
@ -22,9 +22,7 @@ const (
|
|||
youtubeIdType_ChannelId
|
||||
youtubeIdType_Playlist
|
||||
|
||||
header = "\x031,0You\x030,4Tube\x03" +
|
||||
"99,99" + /* Fix for KiwiIRC not flushing background color on empty color tag */
|
||||
"\x03" /* Fix for Mibbit interpreting 99 as green instead of transparent */
|
||||
header = "\x0301,00You\x0300,04Tube"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -168,6 +166,12 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
|||
} else {
|
||||
log.Print(err)
|
||||
}
|
||||
|
||||
if item.ContentDetails.ContentRating != nil {
|
||||
if item.ContentDetails.ContentRating.YtRating == "ytAgeRestricted" {
|
||||
r["AgeRestriction"] = "NSFW"
|
||||
}
|
||||
}
|
||||
}
|
||||
if item.Statistics != nil {
|
||||
r["Views"] = item.Statistics.ViewCount
|
||||
|
@ -208,8 +212,7 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
|||
r := map[string]interface{}{
|
||||
"Header": header,
|
||||
"IsProfile": true,
|
||||
"Title": "Channel",
|
||||
"Author": item.Snippet.Title,
|
||||
"Name": item.Snippet.Title,
|
||||
"CountryCode": item.Snippet.Country,
|
||||
"Description": item.Snippet.Description,
|
||||
"ShortUrl": item.Snippet.CustomUrl,
|
||||
|
|
32
util.go
32
util.go
|
@ -1,7 +1,6 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
@ -28,34 +27,3 @@ func stripIrcFormatting(text string) string {
|
|||
text = rxIrcColor.ReplaceAllLiteralString(text, "")
|
||||
return text
|
||||
}
|
||||
|
||||
func getYouTubeId(uri *url.URL) string {
|
||||
u := &(*uri)
|
||||
u.Scheme = strings.ToLower(u.Scheme)
|
||||
u.Host = strings.ToLower(u.Host)
|
||||
|
||||
// Must be an HTTP URL
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Remove www. prefix from hostname
|
||||
if strings.HasPrefix(u.Host, "www.") {
|
||||
u.Host = u.Host[4:]
|
||||
}
|
||||
|
||||
switch strings.ToLower(u.Host) {
|
||||
case "youtu.be":
|
||||
// http://youtu.be/{id}
|
||||
if s, err := url.QueryUnescape(strings.TrimLeft(u.Path, "/")); err == nil {
|
||||
return s
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
case "youtube.com":
|
||||
// http://youtube.com/watch?v={id}
|
||||
return u.Query().Get("v")
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
package limitedio
|
||||
|
||||
import "io"
|
||||
|
||||
type limitedReader struct {
|
||||
io.Reader
|
||||
rest int
|
||||
}
|
||||
|
||||
func NewLimitedReader(r io.Reader, limit int) io.Reader {
|
||||
return &limitedReader{r, limit}
|
||||
}
|
||||
|
||||
func (r *limitedReader) Read(data []byte) (n int, err error) {
|
||||
if r.rest <= 0 {
|
||||
err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
var dataSize int
|
||||
if len(data) < r.rest {
|
||||
dataSize = len(data)
|
||||
} else {
|
||||
dataSize = r.rest
|
||||
}
|
||||
|
||||
actualData := make([]byte, dataSize)
|
||||
n, err = r.Reader.Read(actualData)
|
||||
if n > 0 {
|
||||
copy(data, actualData)
|
||||
}
|
||||
r.rest -= (n)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type limitedReadCloser struct {
|
||||
*limitedReader
|
||||
closeMethod func() error
|
||||
}
|
||||
|
||||
func NewLimitedReadCloser(r io.ReadCloser, limit int) io.Reader {
|
||||
return &limitedReadCloser{&limitedReader{r, limit}, r.Close}
|
||||
}
|
||||
|
||||
func (rc *limitedReadCloser) Close() error {
|
||||
return rc.closeMethod()
|
||||
}
|
32
util_test.go
32
util_test.go
|
@ -1,33 +1,3 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func mustParseUrl(u string) *url.URL {
|
||||
if uri, err := url.Parse(u); err == nil {
|
||||
return uri
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_GetYouTubeId(t *testing.T) {
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://youtube.com/watch?v=aYz-9jUlav-")))
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://youtube.com/watch?v=aYz-9jUlav-")))
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://www.youtube.com/watch?v=aYz-9jUlav-")))
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://www.youtube.com/watch?v=aYz-9jUlav-")))
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://youtu.be/aYz-9jUlav-")))
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://youtu.be/aYz-9jUlav-")))
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://www.youtu.be/aYz-9jUlav-")))
|
||||
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://www.youtu.be/aYz-9jUlav-")))
|
||||
}
|
||||
|
||||
func Benchmark_GetYouTubeId(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
getYouTubeId(mustParseUrl("http://youtube.com/watch?v=aYz-9jUlav-"))
|
||||
}
|
||||
}
|
||||
// TODO - unit test stripIrcFormatting
|
||||
|
|
Loading…
Reference in New Issue