Digg + Twitter corrections

Digg engines, with thumbnails
Add pubdate for twitter
This commit is contained in:
Cqoicebordel 2014-12-28 22:57:59 +01:00
parent 011c43b485
commit e7e2981536
3 changed files with 86 additions and 6 deletions

66
searx/engines/digg.py Normal file
View File

@ -0,0 +1,66 @@
## Digg (News, Social media)
#
# @website https://digg.com/
# @provide-api no
#
# @using-api no
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, content, publishedDate, thumbnail
from urllib import quote_plus
from json import loads
from lxml import html
from cgi import escape
from dateutil import parser
# engine dependent config
categories = ['news', 'social media']
paging = True
# search-url
base_url = 'https://digg.com/'
search_url = base_url+'api/search/{query}.json?position={position}&format=html'
# specific xpath variables
results_xpath = '//article'
link_xpath = './/small[@class="time"]//a'
title_xpath = './/h2//a//text()'
content_xpath = './/p//text()'
pubdate_xpath = './/time'
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 10
params['url'] = search_url.format(position=offset,
query=quote_plus(query))
return params
# get response from search-request
def response(resp):
results = []
search_result = loads(resp.text)
dom = html.fromstring(search_result['html'])
# parse results
for result in dom.xpath(results_xpath):
url = result.attrib.get('data-contenturl')
thumbnail = result.xpath('.//img')[0].attrib.get('src')
title = ''.join(result.xpath(title_xpath))
content = escape(''.join(result.xpath(content_xpath)))
publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
# append result
results.append({'url': url,
'title': title,
'content': content,
'template': 'videos.html',
'publishedDate': publishedDate,
'thumbnail': thumbnail})
# return results
return results

View File

@ -1,6 +1,6 @@
## Twitter (Social media) ## Twitter (Social media)
# #
# @website https://www.bing.com/news # @website https://twitter.com/
# @provide-api yes (https://dev.twitter.com/docs/using-search) # @provide-api yes (https://dev.twitter.com/docs/using-search)
# #
# @using-api no # @using-api no
@ -14,6 +14,7 @@ from urlparse import urljoin
from urllib import urlencode from urllib import urlencode
from lxml import html from lxml import html
from cgi import escape from cgi import escape
from datetime import datetime
# engine dependent config # engine dependent config
categories = ['social media'] categories = ['social media']
@ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a' link_xpath = './/small[@class="time"]//a'
title_xpath = './/span[@class="username js-action-profile-name"]//text()' title_xpath = './/span[@class="username js-action-profile-name"]//text()'
content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
# do search-request # do search-request
@ -53,7 +55,15 @@ def response(resp):
url = urljoin(base_url, link.attrib.get('href')) url = urljoin(base_url, link.attrib.get('href'))
title = ''.join(tweet.xpath(title_xpath)) title = ''.join(tweet.xpath(title_xpath))
content = escape(''.join(tweet.xpath(content_xpath))) content = escape(''.join(tweet.xpath(content_xpath)))
pubdate = tweet.xpath(timestamp_xpath)
if len(pubdate) > 0:
publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
# append result
results.append({'url': url,
'title': title,
'content': content,
'publishedDate': publishedDate})
else:
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,

View File

@ -45,6 +45,10 @@ engines:
engine : duckduckgo_definitions engine : duckduckgo_definitions
shortcut : ddd shortcut : ddd
- name : digg
engine : digg
shortcut : dg
- name : wikidata - name : wikidata
engine : wikidata engine : wikidata
shortcut : wd shortcut : wd