From 769e0e90a71f78d4ff94e23bd13a757080de702a Mon Sep 17 00:00:00 2001 From: Carl Kittelberger Date: Sun, 3 Jul 2016 18:40:00 +0200 Subject: [PATCH] manager: Fix how URLs are passed to parsers. This should prevent parses from being able to leak changes on the original URL object to other parsers. Fixes issue #1. --- manager/parser.go | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/manager/parser.go b/manager/parser.go index 05eb591..90c1896 100644 --- a/manager/parser.go +++ b/manager/parser.go @@ -52,33 +52,40 @@ func (m *Manager) RegisterParser(parser Parser) error { return nil } -func (m *Manager) Parse(u *url.URL) (string, parsers.ParseResult) { - var oldU *url.URL +func (m *Manager) Parse(currentUrl *url.URL) (string, parsers.ParseResult) { + var referer *url.URL attempt := 0 followLoop: - for u != nil { - log.Printf("Parsing %s (referer %s)...", u, oldU) + for currentUrl != nil { attempt++ if attempt > 15 { - log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", u) + log.Printf("WARNING: Potential infinite loop for url %s, abort parsing", currentUrl) break } for _, p := range m.GetParsers() { - r := p.Parse(u, oldU) + var refererCopy *url.URL + if referer != nil { + refererCopy = &url.URL{} + *refererCopy = *referer + } + currentUrlCopy := &url.URL{} + *currentUrlCopy = *currentUrl + r := p.Parse(currentUrlCopy, refererCopy) if r.Ignored { continue } if r.FollowUrl != nil { - if *u == *r.FollowUrl { + if *currentUrl == *r.FollowUrl { log.Printf("WARNING: Ignoring request to follow to same URL, ignoring.") break followLoop } - oldU, u = u, r.FollowUrl + referer = currentUrl + currentUrl = r.FollowUrl continue followLoop } return p.Name(), r } - u = nil + currentUrl = nil } // No parser matches, link ignored