[fix] handle missing url scheme - fixes #428

This commit is contained in:
Adam Tauber 2015-09-11 17:57:09 +02:00
parent 23dab175b2
commit e3df22b140
1 changed files with 2 additions and 0 deletions

View File

@ -53,6 +53,8 @@ def response(resp):
# parse results
for result in dom.xpath(results_xpath):
url = result.xpath(url_xpath)[0].text
if not url.startswith('http://') and not url.startswith('https://'):
url = 'http://' + url
title = result.xpath(title_xpath)[0].text
content = escape(result.xpath(content_xpath)[0].text)