manager: Fix how URLs are passed to parsers.
This should prevent parses from being able to leak changes on the original URL object to other parsers. Fixes issue #1.develop
parent
d6a32315f6
commit
769e0e90a7
|
@ -52,33 +52,40 @@ func (m *Manager) RegisterParser(parser Parser) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) Parse(u *url.URL) (string, parsers.ParseResult) {
|
func (m *Manager) Parse(currentUrl *url.URL) (string, parsers.ParseResult) {
|
||||||
var oldU *url.URL
|
var referer *url.URL
|
||||||
attempt := 0
|
attempt := 0
|
||||||
followLoop:
|
followLoop:
|
||||||
for u != nil {
|
for currentUrl != nil {
|
||||||
log.Printf("Parsing %s (referer %s)...", u, oldU)
|
|
||||||
attempt++
|
attempt++
|
||||||
if attempt > 15 {
|
if attempt > 15 {
|
||||||
log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", u)
|
log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", currentUrl)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
for _, p := range m.GetParsers() {
|
for _, p := range m.GetParsers() {
|
||||||
r := p.Parse(u, oldU)
|
var refererCopy *url.URL
|
||||||
|
if referer != nil {
|
||||||
|
refererCopy = &url.URL{}
|
||||||
|
*refererCopy = *referer
|
||||||
|
}
|
||||||
|
currentUrlCopy := &url.URL{}
|
||||||
|
*currentUrlCopy = *currentUrl
|
||||||
|
r := p.Parse(currentUrlCopy, refererCopy)
|
||||||
if r.Ignored {
|
if r.Ignored {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if r.FollowUrl != nil {
|
if r.FollowUrl != nil {
|
||||||
if *u == *r.FollowUrl {
|
if *currentUrl == *r.FollowUrl {
|
||||||
log.Printf("WARNING: Ignoring request to follow to same URL, ignoring.")
|
log.Printf("WARNING: Ignoring request to follow to same URL, ignoring.")
|
||||||
break followLoop
|
break followLoop
|
||||||
}
|
}
|
||||||
oldU, u = u, r.FollowUrl
|
referer = currentUrl
|
||||||
|
currentUrl = r.FollowUrl
|
||||||
continue followLoop
|
continue followLoop
|
||||||
}
|
}
|
||||||
return p.Name(), r
|
return p.Name(), r
|
||||||
}
|
}
|
||||||
u = nil
|
currentUrl = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// No parser matches, link ignored
|
// No parser matches, link ignored
|
||||||
|
|
Loading…
Reference in New Issue