From ee1ffbc87f0b2471e33b14561002c400cd8db861 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Tue, 4 Mar 2014 14:19:59 +0100 Subject: [PATCH] [fix] yahoo engine url extraction --- searx/engines/yahoo.py | 2 +- searx/engines/yahoo_news.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index f83b4b96..f070b8a7 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -35,7 +35,7 @@ def response(resp): for result in dom.xpath(results_xpath): url_string = extract_url(result.xpath(url_xpath), search_url) - start = url_string.find('/RU=')+4 + start = url_string.find('http', url_string.find('/RU=')+1) end = url_string.rfind('/RS') url = unquote(url_string[start:end]) title = extract_text(result.xpath(title_xpath)[0]) diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 6ece496c..3c257866 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -35,7 +35,7 @@ def response(resp): for result in dom.xpath(results_xpath): url_string = extract_url(result.xpath(url_xpath), search_url) - start = url_string.find('/RU=')+4 + start = url_string.find('http', url_string.find('/RU=')+1) end = url_string.rfind('/RS') url = unquote(url_string[start:end]) title = extract_text(result.xpath(title_xpath)[0])