From ec899f0ddf4b229b9aef3627c9fc56c7db6dac5d Mon Sep 17 00:00:00 2001 From: Carl Kittelberger Date: Sun, 19 Jun 2016 23:31:47 +0200 Subject: [PATCH] Replace new-line characters in HTML title with space. Targets #2. --- parsers/web/parser.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/parsers/web/parser.go b/parsers/web/parser.go index e2ae317..9476b09 100644 --- a/parsers/web/parser.go +++ b/parsers/web/parser.go @@ -5,6 +5,7 @@ import ( "log" "net/http" "net/url" + "regexp" "strings" "golang.org/x/net/html" @@ -22,6 +23,8 @@ import ( var ( ErrCorruptedImage = errors.New("Corrupted image.") + + rxNewlines = regexp.MustCompile(`(?:\r?\n)*`) ) const ( @@ -105,7 +108,7 @@ func (p *Parser) Parse(u *url.URL, referer *url.URL) (result parsers.ParseResult title, ok := scrape.Find(root, scrape.ByTag(atom.Title)) if ok { // Got it! - result.Information[0]["Title"] = scrape.Text(title) + result.Information[0]["Title"] = rxNewlines.ReplaceAllString(scrape.Text(title), " ") } else { // No title found result.Information[0]["Title"] = "(no title)"