From 52a57ee045e02844a8f650a9d3ae30e0092d86cd Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Fri, 30 Jan 2015 21:00:49 +0100 Subject: [PATCH] Replace every bunch of whitespaces with only one space in HTML text --- searx/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/searx/utils.py b/searx/utils.py index 59d4b85b..ef221ef8 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser): def html_to_text(html): + html = html.replace('\n', ' ') + html = ' '.join(html.split()) s = HTMLTextExtractor() s.feed(html) return s.get_text()