diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index ca27a5b2..93571860 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -6,7 +6,7 @@ from json import loads categories = ['news'] url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa +search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa paging = True language_support = True diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index f83b4b96..f070b8a7 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -35,7 +35,7 @@ def response(resp): for result in dom.xpath(results_xpath): url_string = extract_url(result.xpath(url_xpath), search_url) - start = url_string.find('/RU=')+4 + start = url_string.find('http', url_string.find('/RU=')+1) end = url_string.rfind('/RS') url = unquote(url_string[start:end]) title = extract_text(result.xpath(title_xpath)[0]) diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 6ece496c..3c257866 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -35,7 +35,7 @@ def response(resp): for result in dom.xpath(results_xpath): url_string = extract_url(result.xpath(url_xpath), search_url) - start = url_string.find('/RU=')+4 + start = url_string.find('http', url_string.find('/RU=')+1) end = url_string.rfind('/RS') url = unquote(url_string[start:end]) title = extract_text(result.xpath(title_xpath)[0]) diff --git a/searx/utils.py b/searx/utils.py index af8ce952..b99a945d 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -4,12 +4,15 @@ import csv from codecs import getincrementalencoder import cStringIO import re +from random import choice +ua_versions = ('26.0', '27.0', '28.0') +ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0') +ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}" def gen_useragent(): # TODO - ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" - return ua + return ua.format(os=choice(ua_os), version=choice(ua_versions)) def highlight_content(content, query):