Compare commits
No commits in common. "master" and "v1.0.0" have entirely different histories.
|
@ -1,5 +0,0 @@
|
||||||
language: go
|
|
||||||
|
|
||||||
go:
|
|
||||||
- 1.6.2
|
|
||||||
- tip
|
|
10
README.md
10
README.md
|
@ -1,7 +1,5 @@
|
||||||
# MediaLink IRC Bot
|
# MediaLink IRC Bot
|
||||||
|
|
||||||
[](https://travis-ci.org/icedream/irc-medialink)
|
|
||||||
|
|
||||||
This IRC bot automatically parses links posted in chat rooms and prints information about them.
|
This IRC bot automatically parses links posted in chat rooms and prints information about them.
|
||||||
|
|
||||||
Currently explicit support has been built in for:
|
Currently explicit support has been built in for:
|
||||||
|
@ -13,7 +11,7 @@ Currently explicit support has been built in for:
|
||||||
|
|
||||||
Generally, for websites that are not directly supported the bot will print the page title.
|
Generally, for websites that are not directly supported the bot will print the page title.
|
||||||
|
|
||||||
## How to run the bot
|
# How to run the bot
|
||||||
|
|
||||||
In order to properly run the bot, you need to [register a SoundCloud application](http://soundcloud.com/you/apps/new) and [get a YouTube Data API key](https://console.developers.google.com/apis/api/youtube/overview) for it and then feed the API data to the bot through the command line arguments.
|
In order to properly run the bot, you need to [register a SoundCloud application](http://soundcloud.com/you/apps/new) and [get a YouTube Data API key](https://console.developers.google.com/apis/api/youtube/overview) for it and then feed the API data to the bot through the command line arguments.
|
||||||
|
|
||||||
|
@ -27,7 +25,7 @@ Then you can find out which options you can pass to the bot directly by running
|
||||||
|
|
||||||
You need to at least pass the `--server`, `--youtube-key`, `--soundcloud-id` and `--soundcloud-secret` parameters.
|
You need to at least pass the `--server`, `--youtube-key`, `--soundcloud-id` and `--soundcloud-secret` parameters.
|
||||||
|
|
||||||
### ...with Docker
|
# ...with Docker
|
||||||
|
|
||||||
You can use the `icedream/irc-medialink` image in order to run this bot in Docker. You can pull it using this command:
|
You can use the `icedream/irc-medialink` image in order to run this bot in Docker. You can pull it using this command:
|
||||||
|
|
||||||
|
@ -56,12 +54,12 @@ services:
|
||||||
restart: always
|
restart: always
|
||||||
```
|
```
|
||||||
|
|
||||||
## Support
|
# Support
|
||||||
|
|
||||||
This bot is officially tested and running on the LibraIRC IRC network (irc.librairc.net) though also being able to run on other IRC networks.
|
This bot is officially tested and running on the LibraIRC IRC network (irc.librairc.net) though also being able to run on other IRC networks.
|
||||||
|
|
||||||
For support on LibraIRC please use the channel #MediaLink there to get in contact with Icedream.
|
For support on LibraIRC please use the channel #MediaLink there to get in contact with Icedream.
|
||||||
|
|
||||||
## License
|
# License
|
||||||
|
|
||||||
This project is licensed under the **GNU General Public License Version 2 or later**. For more information check the [LICENSE](LICENSE) file.
|
This project is licensed under the **GNU General Public License Version 2 or later**. For more information check the [LICENSE](LICENSE) file.
|
||||||
|
|
34
main.go
34
main.go
|
@ -32,10 +32,7 @@ func main() {
|
||||||
var soundcloudClientId string
|
var soundcloudClientId string
|
||||||
var soundcloudClientSecret string
|
var soundcloudClientSecret string
|
||||||
|
|
||||||
var webEnableImages bool
|
|
||||||
|
|
||||||
var debug bool
|
var debug bool
|
||||||
var noInvite bool
|
|
||||||
var useTLS bool
|
var useTLS bool
|
||||||
var server string
|
var server string
|
||||||
var password string
|
var password string
|
||||||
|
@ -51,7 +48,6 @@ func main() {
|
||||||
kingpin.Flag("nick", "The nickname.").Short('n').StringVar(&nickname)
|
kingpin.Flag("nick", "The nickname.").Short('n').StringVar(&nickname)
|
||||||
kingpin.Flag("ident", "The ident.").Short('i').StringVar(&ident)
|
kingpin.Flag("ident", "The ident.").Short('i').StringVar(&ident)
|
||||||
kingpin.Flag("debug", "Enables debug mode.").Short('d').BoolVar(&debug)
|
kingpin.Flag("debug", "Enables debug mode.").Short('d').BoolVar(&debug)
|
||||||
kingpin.Flag("no-invite", "Disables auto-join on invite.").BoolVar(&noInvite)
|
|
||||||
kingpin.Flag("tls", "Use TLS.").BoolVar(&useTLS)
|
kingpin.Flag("tls", "Use TLS.").BoolVar(&useTLS)
|
||||||
kingpin.Flag("server", "The server to connect to.").Short('s').StringVar(&server)
|
kingpin.Flag("server", "The server to connect to.").Short('s').StringVar(&server)
|
||||||
kingpin.Flag("password", "The password to use for logging into the IRC server.").Short('p').StringVar(&password)
|
kingpin.Flag("password", "The password to use for logging into the IRC server.").Short('p').StringVar(&password)
|
||||||
|
@ -62,14 +58,9 @@ func main() {
|
||||||
|
|
||||||
// Youtube config
|
// Youtube config
|
||||||
kingpin.Flag("youtube-key", "The API key to use to access the YouTube API.").StringVar(&youtubeApiKey)
|
kingpin.Flag("youtube-key", "The API key to use to access the YouTube API.").StringVar(&youtubeApiKey)
|
||||||
|
|
||||||
// SoundCloud config
|
|
||||||
kingpin.Flag("soundcloud-id", "The SoundCloud ID.").StringVar(&soundcloudClientId)
|
kingpin.Flag("soundcloud-id", "The SoundCloud ID.").StringVar(&soundcloudClientId)
|
||||||
kingpin.Flag("soundcloud-secret", "The SoundCloud secret.").StringVar(&soundcloudClientSecret)
|
kingpin.Flag("soundcloud-secret", "The SoundCloud secret.").StringVar(&soundcloudClientSecret)
|
||||||
|
|
||||||
// Web parser config
|
|
||||||
kingpin.Flag("images", "Enables parsing links of images. Disabled by default for legal reasons.").BoolVar(&webEnableImages)
|
|
||||||
|
|
||||||
kingpin.Parse()
|
kingpin.Parse()
|
||||||
|
|
||||||
if len(nickname) == 0 {
|
if len(nickname) == 0 {
|
||||||
|
@ -83,17 +74,12 @@ func main() {
|
||||||
m := manager.NewManager()
|
m := manager.NewManager()
|
||||||
|
|
||||||
// Load youtube parser
|
// Load youtube parser
|
||||||
if len(youtubeApiKey) > 0 {
|
|
||||||
youtubeParser := &youtube.Parser{
|
youtubeParser := &youtube.Parser{
|
||||||
Config: &youtube.Config{ApiKey: youtubeApiKey},
|
Config: &youtube.Config{ApiKey: youtubeApiKey},
|
||||||
}
|
}
|
||||||
must(m.RegisterParser(youtubeParser))
|
must(m.RegisterParser(youtubeParser))
|
||||||
} else {
|
|
||||||
log.Println("No YouTube API key provided, YouTube parsing via API is disabled.")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load soundcloud parser
|
// Load soundcloud parser
|
||||||
if len(soundcloudClientId) > 0 && len(soundcloudClientSecret) > 0 {
|
|
||||||
soundcloudParser := &soundcloud.Parser{
|
soundcloudParser := &soundcloud.Parser{
|
||||||
Config: &soundcloud.Config{
|
Config: &soundcloud.Config{
|
||||||
ClientId: soundcloudClientId,
|
ClientId: soundcloudClientId,
|
||||||
|
@ -101,18 +87,12 @@ func main() {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
must(m.RegisterParser(soundcloudParser))
|
must(m.RegisterParser(soundcloudParser))
|
||||||
} else {
|
|
||||||
log.Println("No SoundCloud client ID or secret provided, SoundCloud parsing via API is disabled.")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load wikipedia parser
|
// Load wikipedia parser
|
||||||
must(m.RegisterParser(new(wikipedia.Parser)))
|
must(m.RegisterParser(new(wikipedia.Parser)))
|
||||||
|
|
||||||
// Load web parser
|
// Load web parser
|
||||||
webParser := &web.Parser{
|
must(m.RegisterParser(new(web.Parser)))
|
||||||
EnableImages: webEnableImages,
|
|
||||||
}
|
|
||||||
must(m.RegisterParser(webParser))
|
|
||||||
|
|
||||||
// IRC
|
// IRC
|
||||||
conn := m.AntifloodIrcConn(irc.IRC(nickname, ident))
|
conn := m.AntifloodIrcConn(irc.IRC(nickname, ident))
|
||||||
|
@ -149,8 +129,6 @@ func main() {
|
||||||
conn.AddCallback("JOIN", func(e *irc.Event) {
|
conn.AddCallback("JOIN", func(e *irc.Event) {
|
||||||
// Is this JOIN not about us?
|
// Is this JOIN not about us?
|
||||||
if !strings.EqualFold(e.Nick, conn.GetNick()) {
|
if !strings.EqualFold(e.Nick, conn.GetNick()) {
|
||||||
// Save this user's details for a temporary ignore
|
|
||||||
m.NotifyUserJoined(e.Arguments[0], e.Source)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -160,7 +138,6 @@ func main() {
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
if !noInvite {
|
|
||||||
conn.AddCallback("INVITE", func(e *irc.Event) {
|
conn.AddCallback("INVITE", func(e *irc.Event) {
|
||||||
// Is this INVITE not for us?
|
// Is this INVITE not for us?
|
||||||
if !strings.EqualFold(e.Arguments[0], conn.GetNick()) {
|
if !strings.EqualFold(e.Arguments[0], conn.GetNick()) {
|
||||||
|
@ -184,7 +161,7 @@ func main() {
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
conn.Privmsgf(targetChannel, "Thanks for inviting me, %s! I am %s, the friendly bot that shows information about links posted in this channel. I hope I can be of great help for everyone here in %s! :)", sourceNick, conn.GetNick(), targetChannel)
|
conn.Privmsgf(targetChannel, "Thanks for inviting me, %s! I am %s, the friendly bot that shows information about links posted in this channel. I hope I can be of great help for everyone here in %s! :)", sourceNick, conn.GetNick(), targetChannel)
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(2 * time.Second)
|
||||||
conn.Privmsg(targetChannel, "If you ever run into trouble with me (or find any bugs), please use the channel #MediaLink for contact on this IRC.")
|
conn.Privmsg(targetChannel, "If you ever run into trouble with me (or find any bugs), please us the channel #MediaLink for contact on this IRC.")
|
||||||
break joinWaitLoop
|
break joinWaitLoop
|
||||||
}
|
}
|
||||||
case channel := <-inviteChan:
|
case channel := <-inviteChan:
|
||||||
|
@ -199,7 +176,6 @@ func main() {
|
||||||
}(e.Nick, e.Arguments[1])
|
}(e.Nick, e.Arguments[1])
|
||||||
conn.Join(e.Arguments[1])
|
conn.Join(e.Arguments[1])
|
||||||
})
|
})
|
||||||
}
|
|
||||||
conn.AddCallback("PRIVMSG", func(e *irc.Event) {
|
conn.AddCallback("PRIVMSG", func(e *irc.Event) {
|
||||||
go func(event *irc.Event) {
|
go func(event *irc.Event) {
|
||||||
//sender := event.Nick
|
//sender := event.Nick
|
||||||
|
@ -220,12 +196,6 @@ func main() {
|
||||||
|
|
||||||
log.Printf("<%s @ %s> %s", event.Nick, target, msg)
|
log.Printf("<%s @ %s> %s", event.Nick, target, msg)
|
||||||
|
|
||||||
// Ignore user if they just joined
|
|
||||||
if shouldIgnore := m.TrackUser(target, event.Source); shouldIgnore {
|
|
||||||
log.Print("This message will be ignored since the user just joined.")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
urlStr := xurls.Relaxed.FindString(msg)
|
urlStr := xurls.Relaxed.FindString(msg)
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
|
|
26
main.tpl
26
main.tpl
|
@ -21,17 +21,10 @@
|
||||||
{{- else -}}
|
{{- else -}}
|
||||||
Link info
|
Link info
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
{{- reset }}
|
{{- bold }}
|
||||||
|
|
||||||
»
|
»
|
||||||
|
|
||||||
{{- if index . "AgeRestriction" }}
|
|
||||||
{{ color 4 -}}
|
|
||||||
{{ bold -}}
|
|
||||||
[{{- index . "AgeRestriction" }}]
|
|
||||||
{{- reset }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{ if index . "IsProfile" }}
|
{{ if index . "IsProfile" }}
|
||||||
{{- if index . "Title" }}
|
{{- if index . "Title" }}
|
||||||
{{ bold -}}
|
{{ bold -}}
|
||||||
|
@ -67,19 +60,15 @@
|
||||||
({{ . }})
|
({{ . }})
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ else }}
|
{{ else }}
|
||||||
{{ with index . "Description" }}
|
{{ if index . "Description" }}
|
||||||
{{ excerpt 384 . }}
|
{{ excerpt 384 (index . "Description") }}
|
||||||
|
{{ else }}
|
||||||
|
{{ with index . "ImageType" }}
|
||||||
|
{{ . }} image,
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
{{ if index . "ImageType" }}
|
|
||||||
{{ if index . "Title" }}
|
|
||||||
·
|
|
||||||
{{ end }}
|
|
||||||
{{ .ImageType }} image,
|
|
||||||
{{ if (index . "ImageSize") (index . "Size") }}
|
{{ if (index . "ImageSize") (index . "Size") }}
|
||||||
{{ with index . "ImageSize" }}
|
{{ with index . "ImageSize" }}
|
||||||
{{ .X }}×{{ .Y }}
|
{{ .X }}x{{ .Y }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ with index . "Size" }}
|
{{ with index . "Size" }}
|
||||||
({{ size . }})
|
({{ size . }})
|
||||||
|
@ -87,6 +76,7 @@
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
{{ end }}
|
||||||
|
|
||||||
{{ if or (index . "Author") }}
|
{{ if or (index . "Author") }}
|
||||||
{{ if index . "Author" }}
|
{{ if index . "Author" }}
|
||||||
|
|
|
@ -17,28 +17,6 @@ func (m *Manager) initAntiflood() {
|
||||||
m.cache = cache.New(1*time.Minute, 5*time.Second)
|
m.cache = cache.New(1*time.Minute, 5*time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) TrackUser(target string, source string) (shouldIgnore bool) {
|
|
||||||
key := normalizeUserAntiflood(target, source)
|
|
||||||
|
|
||||||
if _, ok := m.cache.Get(key); ok {
|
|
||||||
// User just joined here recently, ignore them
|
|
||||||
shouldIgnore = true
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) NotifyUserJoined(target string, source string) {
|
|
||||||
key := normalizeUserAntiflood(target, source)
|
|
||||||
|
|
||||||
// When a user joins, he will be ignored for the first 30 seconds,
|
|
||||||
// enough to prevent parsing links from people who only join to spam their
|
|
||||||
// links immediately
|
|
||||||
if _, exists := m.cache.Get(key); !exists {
|
|
||||||
m.cache.Add(key, nil, 30*time.Second)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) TrackUrl(target string, u *url.URL) (shouldIgnore bool) {
|
func (m *Manager) TrackUrl(target string, u *url.URL) (shouldIgnore bool) {
|
||||||
key := normalizeUrlAntiflood(target, u)
|
key := normalizeUrlAntiflood(target, u)
|
||||||
|
|
||||||
|
@ -92,17 +70,6 @@ func normalizeTextAntiflood(target, text string) string {
|
||||||
return fmt.Sprintf("TEXT/%s/%X", strings.ToUpper(target), s.Sum([]byte{}))
|
return fmt.Sprintf("TEXT/%s/%X", strings.ToUpper(target), s.Sum([]byte{}))
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeUserAntiflood(target, source string) string {
|
|
||||||
sourceSplitHost := strings.SplitN(source, "@", 2)
|
|
||||||
sourceSplitHostname := strings.Split(sourceSplitHost[1], ".")
|
|
||||||
if len(sourceSplitHostname) > 1 &&
|
|
||||||
strings.EqualFold(sourceSplitHostname[len(sourceSplitHostname)-1], "IP") {
|
|
||||||
sourceSplitHostname[0] = "*"
|
|
||||||
}
|
|
||||||
source = fmt.Sprintf("%s!%s@%s", "*", "*", strings.Join(sourceSplitHostname, "."))
|
|
||||||
return fmt.Sprintf("USER/%s/%s", strings.ToUpper(target), source)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Proxies several methods of the IRC connection in order to drop repeated messages
|
// Proxies several methods of the IRC connection in order to drop repeated messages
|
||||||
type ircConnectionProxy struct {
|
type ircConnectionProxy struct {
|
||||||
*irc.Connection
|
*irc.Connection
|
||||||
|
|
|
@ -52,40 +52,33 @@ func (m *Manager) RegisterParser(parser Parser) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) Parse(currentUrl *url.URL) (string, parsers.ParseResult) {
|
func (m *Manager) Parse(u *url.URL) (string, parsers.ParseResult) {
|
||||||
var referer *url.URL
|
var oldU *url.URL
|
||||||
attempt := 0
|
attempt := 0
|
||||||
followLoop:
|
followLoop:
|
||||||
for currentUrl != nil {
|
for u != nil {
|
||||||
|
log.Printf("Parsing %s (referer %s)...", u, oldU)
|
||||||
attempt++
|
attempt++
|
||||||
if attempt > 15 {
|
if attempt > 15 {
|
||||||
log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", currentUrl)
|
log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", u)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
for _, p := range m.GetParsers() {
|
for _, p := range m.GetParsers() {
|
||||||
var refererCopy *url.URL
|
r := p.Parse(u, oldU)
|
||||||
if referer != nil {
|
|
||||||
refererCopy = &url.URL{}
|
|
||||||
*refererCopy = *referer
|
|
||||||
}
|
|
||||||
currentUrlCopy := &url.URL{}
|
|
||||||
*currentUrlCopy = *currentUrl
|
|
||||||
r := p.Parse(currentUrlCopy, refererCopy)
|
|
||||||
if r.Ignored {
|
if r.Ignored {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if r.FollowUrl != nil {
|
if r.FollowUrl != nil {
|
||||||
if *currentUrl == *r.FollowUrl {
|
if *u == *r.FollowUrl {
|
||||||
log.Printf("WARNING: Ignoring request to follow to same URL, ignoring.")
|
log.Printf("WARNING: Ignoring request to follow to same URL, ignoring.")
|
||||||
break followLoop
|
break followLoop
|
||||||
}
|
}
|
||||||
referer = currentUrl
|
oldU, u = u, r.FollowUrl
|
||||||
currentUrl = r.FollowUrl
|
|
||||||
continue followLoop
|
continue followLoop
|
||||||
}
|
}
|
||||||
return p.Name(), r
|
return p.Name(), r
|
||||||
}
|
}
|
||||||
currentUrl = nil
|
u = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// No parser matches, link ignored
|
// No parser matches, link ignored
|
||||||
|
|
|
@ -2,9 +2,9 @@ package web
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
|
@ -16,25 +16,14 @@ import (
|
||||||
_ "image/png"
|
_ "image/png"
|
||||||
|
|
||||||
"github.com/icedream/irc-medialink/parsers"
|
"github.com/icedream/irc-medialink/parsers"
|
||||||
"github.com/icedream/irc-medialink/util/limitedio"
|
|
||||||
"github.com/yhat/scrape"
|
"github.com/yhat/scrape"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ErrCorruptedImage = errors.New("Corrupted image.")
|
ErrCorruptedImage = errors.New("Corrupted image.")
|
||||||
|
|
||||||
rxNewlines = regexp.MustCompile(`(?:\r?\n)+`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
type Parser struct{}
|
||||||
runeHash = '#'
|
|
||||||
noTitleStr = "(no title)"
|
|
||||||
maxHtmlSize = 8 * 1024
|
|
||||||
)
|
|
||||||
|
|
||||||
type Parser struct {
|
|
||||||
EnableImages bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Parser) Init() error {
|
func (p *Parser) Init() error {
|
||||||
return nil
|
return nil
|
||||||
|
@ -51,12 +40,6 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove hash reference from URL since that's not meant to be in the request
|
|
||||||
if strings.Contains(u.Path, string(runeHash)) {
|
|
||||||
u = &(*u) // avoid modifying original URL object
|
|
||||||
u.Path = u.Path[0:strings.IndexRune(u.Path, runeHash)]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make request
|
// Make request
|
||||||
req, err := http.NewRequest("GET", u.String(), nil)
|
req, err := http.NewRequest("GET", u.String(), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -66,11 +49,12 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
||||||
if referer != nil {
|
if referer != nil {
|
||||||
req.Header.Set("Referer", referer.String())
|
req.Header.Set("Referer", referer.String())
|
||||||
}
|
}
|
||||||
req.Header.Set("User-Agent", "MediaLink IRC Bot")
|
|
||||||
if resp, err := http.DefaultTransport.RoundTrip(req); err != nil {
|
if resp, err := http.DefaultTransport.RoundTrip(req); err != nil {
|
||||||
|
log.Print("HTTP Get failed")
|
||||||
result.Error = err
|
result.Error = err
|
||||||
return
|
return
|
||||||
} else {
|
} else {
|
||||||
|
log.Printf("Web parser result: %+v", resp)
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
if 300 <= resp.StatusCode && resp.StatusCode < 400 {
|
if 300 <= resp.StatusCode && resp.StatusCode < 400 {
|
||||||
if u2, err := resp.Location(); err == nil && u2 != nil && *u2 != *u {
|
if u2, err := resp.Location(); err == nil && u2 != nil && *u2 != *u {
|
||||||
|
@ -91,40 +75,30 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
||||||
if sep < 0 {
|
if sep < 0 {
|
||||||
sep = len(contentType)
|
sep = len(contentType)
|
||||||
}
|
}
|
||||||
|
log.Print(contentType[0:sep])
|
||||||
switch strings.ToLower(contentType[0:sep]) {
|
switch strings.ToLower(contentType[0:sep]) {
|
||||||
case "text/html":
|
case "text/html":
|
||||||
// Parse the page
|
// Parse the page
|
||||||
var contentLength int
|
root, err := html.Parse(resp.Body)
|
||||||
if resp.ContentLength < 0 || resp.ContentLength > maxHtmlSize {
|
|
||||||
contentLength = maxHtmlSize
|
|
||||||
} else {
|
|
||||||
contentLength = int(resp.ContentLength)
|
|
||||||
}
|
|
||||||
limitedBody := limitedio.NewLimitedReader(resp.Body, contentLength)
|
|
||||||
root, err := html.Parse(limitedBody)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result.Error = err
|
result.Error = err
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Search for the title
|
// Search for the title
|
||||||
result.Information = []map[string]interface{}{
|
|
||||||
map[string]interface{}{
|
|
||||||
"IsUpload": false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
|
title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
|
||||||
if ok {
|
if ok {
|
||||||
// Got it!
|
// Got it!
|
||||||
result.Information[0]["Title"] = rxNewlines.ReplaceAllString(scrape.Text(title), " ")
|
result.Information = []map[string]interface{}{
|
||||||
|
map[string]interface{}{
|
||||||
|
"IsUpload": false,
|
||||||
|
"Title": scrape.Text(title),
|
||||||
|
},
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// No title found
|
result.Ignored = true
|
||||||
result.Information[0]["Title"] = noTitleStr
|
|
||||||
}
|
}
|
||||||
case "image/png", "image/jpeg", "image/gif":
|
case "image/png", "image/jpeg", "image/gif":
|
||||||
if p.EnableImages {
|
log.Print("Parsing image...")
|
||||||
|
|
||||||
// No need to limit the reader to a specific size here as
|
|
||||||
// image.DecodeConfig only reads as much as needed anyways.
|
|
||||||
if m, imgType, err := image.DecodeConfig(resp.Body); err != nil {
|
if m, imgType, err := image.DecodeConfig(resp.Body); err != nil {
|
||||||
result.UserError = ErrCorruptedImage
|
result.UserError = ErrCorruptedImage
|
||||||
} else {
|
} else {
|
||||||
|
@ -132,19 +106,16 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
||||||
"IsUpload": true,
|
"IsUpload": true,
|
||||||
"ImageSize": image.Point{X: m.Width, Y: m.Height},
|
"ImageSize": image.Point{X: m.Width, Y: m.Height},
|
||||||
"ImageType": strings.ToUpper(imgType),
|
"ImageType": strings.ToUpper(imgType),
|
||||||
"Title": u.Path[strings.LastIndex(u.Path, "/")+1:],
|
|
||||||
}
|
}
|
||||||
if resp.ContentLength > 0 {
|
if resp.ContentLength > 0 {
|
||||||
info["Size"] = uint64(resp.ContentLength)
|
info["Size"] = uint64(resp.ContentLength)
|
||||||
}
|
}
|
||||||
result.Information = []map[string]interface{}{info}
|
result.Information = []map[string]interface{}{info}
|
||||||
|
log.Printf("Got through: %+v!", info)
|
||||||
}
|
}
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
fallthrough
|
|
||||||
default:
|
default:
|
||||||
// TODO - Implement generic head info?
|
// TODO - Implement generic head info?
|
||||||
|
log.Printf("web parser: Ignoring content of type %s", resp.Header.Get("content-type"))
|
||||||
result.Ignored = true
|
result.Ignored = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,133 +0,0 @@
|
||||||
package web
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/url"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/icedream/irc-medialink/parsers"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func mustNewParser(t *testing.T) *Parser {
|
|
||||||
p := new(Parser)
|
|
||||||
if !assert.Nil(t, p.Init(), "Parser.Init must throw no errors") {
|
|
||||||
panic("Can't run test without a proper parser")
|
|
||||||
}
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseWithTimeout(p *Parser, t *testing.T, timeout time.Duration, u *url.URL, ref *url.URL) (retval parsers.ParseResult) {
|
|
||||||
resultChan := make(chan parsers.ParseResult)
|
|
||||||
go func(resultChan chan<- parsers.ParseResult, p *Parser, u *url.URL, ref *url.URL) {
|
|
||||||
resultChan <- p.Parse(u, ref)
|
|
||||||
}(resultChan, p, u, ref)
|
|
||||||
|
|
||||||
select {
|
|
||||||
case r := <-resultChan:
|
|
||||||
retval = r
|
|
||||||
return
|
|
||||||
case <-time.After(timeout):
|
|
||||||
t.Fatal("Didn't succeed parsing URL in time")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Parser_Parse_IRCBotScience_NoTitle(t *testing.T) {
|
|
||||||
p := mustNewParser(t)
|
|
||||||
result := p.Parse(&url.URL{
|
|
||||||
Scheme: "https",
|
|
||||||
Host: "irc-bot-science.clsr.net",
|
|
||||||
Path: "notitle",
|
|
||||||
}, nil)
|
|
||||||
|
|
||||||
t.Logf("Result: %+v", result)
|
|
||||||
assert.False(t, result.Ignored)
|
|
||||||
assert.Nil(t, result.Error)
|
|
||||||
assert.Nil(t, result.UserError)
|
|
||||||
assert.Len(t, result.Information, 1)
|
|
||||||
assert.Equal(t, noTitleStr, result.Information[0]["Title"])
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Parser_Parse_IRCBotScience_LongHeaders(t *testing.T) {
|
|
||||||
p := mustNewParser(t)
|
|
||||||
result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
|
|
||||||
Scheme: "https",
|
|
||||||
Host: "irc-bot-science.clsr.net",
|
|
||||||
Path: "longheaders",
|
|
||||||
}, nil)
|
|
||||||
for result.FollowUrl != nil {
|
|
||||||
result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Result: %+v", result)
|
|
||||||
assert.True(t, result.Ignored)
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Parser_Parse_IRCBotScience_BigHeader(t *testing.T) {
|
|
||||||
p := mustNewParser(t)
|
|
||||||
result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
|
|
||||||
Scheme: "https",
|
|
||||||
Host: "irc-bot-science.clsr.net",
|
|
||||||
Path: "bigheader",
|
|
||||||
}, nil)
|
|
||||||
for result.FollowUrl != nil {
|
|
||||||
result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Result: %+v", result)
|
|
||||||
assert.True(t, result.Ignored)
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Parser_Parse_IRCBotScience_Large(t *testing.T) {
|
|
||||||
p := mustNewParser(t)
|
|
||||||
|
|
||||||
result := parseWithTimeout(p, t, 5*time.Second, &url.URL{
|
|
||||||
Scheme: "https",
|
|
||||||
Host: "irc-bot-science.clsr.net",
|
|
||||||
Path: "large",
|
|
||||||
}, nil)
|
|
||||||
for result.FollowUrl != nil {
|
|
||||||
result = parseWithTimeout(p, t, 5*time.Second, result.FollowUrl, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Result: %+v", result)
|
|
||||||
assert.False(t, result.Ignored)
|
|
||||||
assert.Nil(t, result.Error)
|
|
||||||
assert.Nil(t, result.UserError)
|
|
||||||
assert.Len(t, result.Information, 1)
|
|
||||||
assert.Equal(t, "If this title is printed, it works correctly.", result.Information[0]["Title"])
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Parser_Parse_IRCBotScience_Redirect(t *testing.T) {
|
|
||||||
p := mustNewParser(t)
|
|
||||||
originalUrl := &url.URL{
|
|
||||||
Scheme: "https",
|
|
||||||
Host: "irc-bot-science.clsr.net",
|
|
||||||
Path: "redirect",
|
|
||||||
}
|
|
||||||
result := p.Parse(originalUrl, nil)
|
|
||||||
|
|
||||||
t.Logf("Result: %+v", result)
|
|
||||||
assert.False(t, result.Ignored)
|
|
||||||
assert.Nil(t, result.Error)
|
|
||||||
assert.Nil(t, result.UserError)
|
|
||||||
assert.NotNil(t, result.FollowUrl)
|
|
||||||
assert.Equal(t, originalUrl.String(), result.FollowUrl.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Parser_Parse_Hash(t *testing.T) {
|
|
||||||
p := mustNewParser(t)
|
|
||||||
originalUrl := &url.URL{
|
|
||||||
Scheme: "https",
|
|
||||||
Host: "www.google.com",
|
|
||||||
Path: "/#invalid",
|
|
||||||
}
|
|
||||||
result := p.Parse(originalUrl, nil)
|
|
||||||
|
|
||||||
t.Logf("Result: %+v", result)
|
|
||||||
assert.False(t, result.Ignored)
|
|
||||||
assert.Nil(t, result.Error)
|
|
||||||
assert.Nil(t, result.UserError)
|
|
||||||
}
|
|
|
@ -23,10 +23,7 @@ func (p *Parser) Init() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult) {
|
func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult) {
|
||||||
if !(strings.EqualFold(u.Scheme, "http") ||
|
if !strings.HasSuffix(strings.ToLower(u.Host), ".wikipedia.org") {
|
||||||
strings.EqualFold(u.Scheme, "https")) ||
|
|
||||||
(!strings.HasSuffix(strings.ToLower(u.Host), ".wikipedia.org") &&
|
|
||||||
!strings.EqualFold(u.Host, "wikipedia.org")) {
|
|
||||||
result.Ignored = true
|
result.Ignored = true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -40,11 +37,6 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
||||||
}
|
}
|
||||||
|
|
||||||
// We're using the original host for link localization
|
// We're using the original host for link localization
|
||||||
// or en.wikipedia.org for (www.)wikipedia.org
|
|
||||||
if strings.EqualFold(u.Host, "wikipedia.org") ||
|
|
||||||
strings.EqualFold(u.Host, "www.wikipedia.org") {
|
|
||||||
u.Host = "en.wikipedia.org"
|
|
||||||
}
|
|
||||||
r, err := http.Get("https://" + u.Host + "/api/rest_v1/page/summary/" + titleEscaped)
|
r, err := http.Get("https://" + u.Host + "/api/rest_v1/page/summary/" + titleEscaped)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result.Error = err
|
result.Error = err
|
||||||
|
|
|
@ -22,7 +22,9 @@ const (
|
||||||
youtubeIdType_ChannelId
|
youtubeIdType_ChannelId
|
||||||
youtubeIdType_Playlist
|
youtubeIdType_Playlist
|
||||||
|
|
||||||
header = "\x0301,00You\x0300,04Tube"
|
header = "\x031,0You\x030,4Tube\x03" +
|
||||||
|
"99,99" + /* Fix for KiwiIRC not flushing background color on empty color tag */
|
||||||
|
"\x03" /* Fix for Mibbit interpreting 99 as green instead of transparent */
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -166,12 +168,6 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
||||||
} else {
|
} else {
|
||||||
log.Print(err)
|
log.Print(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if item.ContentDetails.ContentRating != nil {
|
|
||||||
if item.ContentDetails.ContentRating.YtRating == "ytAgeRestricted" {
|
|
||||||
r["AgeRestriction"] = "NSFW"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if item.Statistics != nil {
|
if item.Statistics != nil {
|
||||||
r["Views"] = item.Statistics.ViewCount
|
r["Views"] = item.Statistics.ViewCount
|
||||||
|
@ -212,7 +208,8 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult
|
||||||
r := map[string]interface{}{
|
r := map[string]interface{}{
|
||||||
"Header": header,
|
"Header": header,
|
||||||
"IsProfile": true,
|
"IsProfile": true,
|
||||||
"Name": item.Snippet.Title,
|
"Title": "Channel",
|
||||||
|
"Author": item.Snippet.Title,
|
||||||
"CountryCode": item.Snippet.Country,
|
"CountryCode": item.Snippet.Country,
|
||||||
"Description": item.Snippet.Description,
|
"Description": item.Snippet.Description,
|
||||||
"ShortUrl": item.Snippet.CustomUrl,
|
"ShortUrl": item.Snippet.CustomUrl,
|
||||||
|
|
32
util.go
32
util.go
|
@ -1,6 +1,7 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
@ -27,3 +28,34 @@ func stripIrcFormatting(text string) string {
|
||||||
text = rxIrcColor.ReplaceAllLiteralString(text, "")
|
text = rxIrcColor.ReplaceAllLiteralString(text, "")
|
||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getYouTubeId(uri *url.URL) string {
|
||||||
|
u := &(*uri)
|
||||||
|
u.Scheme = strings.ToLower(u.Scheme)
|
||||||
|
u.Host = strings.ToLower(u.Host)
|
||||||
|
|
||||||
|
// Must be an HTTP URL
|
||||||
|
if u.Scheme != "http" && u.Scheme != "https" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove www. prefix from hostname
|
||||||
|
if strings.HasPrefix(u.Host, "www.") {
|
||||||
|
u.Host = u.Host[4:]
|
||||||
|
}
|
||||||
|
|
||||||
|
switch strings.ToLower(u.Host) {
|
||||||
|
case "youtu.be":
|
||||||
|
// http://youtu.be/{id}
|
||||||
|
if s, err := url.QueryUnescape(strings.TrimLeft(u.Path, "/")); err == nil {
|
||||||
|
return s
|
||||||
|
} else {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
case "youtube.com":
|
||||||
|
// http://youtube.com/watch?v={id}
|
||||||
|
return u.Query().Get("v")
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
|
@ -1,48 +0,0 @@
|
||||||
package limitedio
|
|
||||||
|
|
||||||
import "io"
|
|
||||||
|
|
||||||
type limitedReader struct {
|
|
||||||
io.Reader
|
|
||||||
rest int
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewLimitedReader(r io.Reader, limit int) io.Reader {
|
|
||||||
return &limitedReader{r, limit}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *limitedReader) Read(data []byte) (n int, err error) {
|
|
||||||
if r.rest <= 0 {
|
|
||||||
err = io.EOF
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var dataSize int
|
|
||||||
if len(data) < r.rest {
|
|
||||||
dataSize = len(data)
|
|
||||||
} else {
|
|
||||||
dataSize = r.rest
|
|
||||||
}
|
|
||||||
|
|
||||||
actualData := make([]byte, dataSize)
|
|
||||||
n, err = r.Reader.Read(actualData)
|
|
||||||
if n > 0 {
|
|
||||||
copy(data, actualData)
|
|
||||||
}
|
|
||||||
r.rest -= (n)
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
type limitedReadCloser struct {
|
|
||||||
*limitedReader
|
|
||||||
closeMethod func() error
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewLimitedReadCloser(r io.ReadCloser, limit int) io.Reader {
|
|
||||||
return &limitedReadCloser{&limitedReader{r, limit}, r.Close}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rc *limitedReadCloser) Close() error {
|
|
||||||
return rc.closeMethod()
|
|
||||||
}
|
|
32
util_test.go
32
util_test.go
|
@ -1,3 +1,33 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
// TODO - unit test stripIrcFormatting
|
import (
|
||||||
|
"net/url"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func mustParseUrl(u string) *url.URL {
|
||||||
|
if uri, err := url.Parse(u); err == nil {
|
||||||
|
return uri
|
||||||
|
} else {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_GetYouTubeId(t *testing.T) {
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://youtube.com/watch?v=aYz-9jUlav-")))
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://youtube.com/watch?v=aYz-9jUlav-")))
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://www.youtube.com/watch?v=aYz-9jUlav-")))
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://www.youtube.com/watch?v=aYz-9jUlav-")))
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://youtu.be/aYz-9jUlav-")))
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://youtu.be/aYz-9jUlav-")))
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("http://www.youtu.be/aYz-9jUlav-")))
|
||||||
|
assert.Equal(t, "aYz-9jUlav-", getYouTubeId(mustParseUrl("https://www.youtu.be/aYz-9jUlav-")))
|
||||||
|
}
|
||||||
|
|
||||||
|
func Benchmark_GetYouTubeId(b *testing.B) {
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
getYouTubeId(mustParseUrl("http://youtube.com/watch?v=aYz-9jUlav-"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue