Merge pull request #139 from dalf/master

pep8 : engines
This commit is contained in:
Adam Tauber 2014-12-07 17:49:01 +01:00
commit 9517f7a6e7
24 changed files with 277 additions and 187 deletions

View File

@ -1,8 +1,9 @@
## Bing (Web) ## Bing (Web)
# #
# @website https://www.bing.com # @website https://www.bing.com
# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
# # max. 5000 query/month
#
# @using-api no (because of query limit) # @using-api no (because of query limit)
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)
@ -58,8 +59,8 @@ def response(resp):
content = escape(' '.join(result.xpath('.//p//text()'))) content = escape(' '.join(result.xpath('.//p//text()')))
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'content': content}) 'content': content})
# return results if something is found # return results if something is found
@ -74,8 +75,8 @@ def response(resp):
content = escape(' '.join(result.xpath('.//p//text()'))) content = escape(' '.join(result.xpath('.//p//text()')))
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'content': content}) 'content': content})
# return results # return results

View File

@ -1,17 +1,19 @@
## Bing (Images) ## Bing (Images)
# #
# @website https://www.bing.com/images # @website https://www.bing.com/images
# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
# # max. 5000 query/month
#
# @using-api no (because of query limit) # @using-api no (because of query limit)
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)
# @parse url, title, img_src # @parse url, title, img_src
# #
# @todo currently there are up to 35 images receive per page, because bing does not parse count=10. limited response to 10 images # @todo currently there are up to 35 images receive per page,
# because bing does not parse count=10.
# limited response to 10 images
from urllib import urlencode from urllib import urlencode
from cgi import escape
from lxml import html from lxml import html
from yaml import load from yaml import load
import re import re
@ -51,15 +53,15 @@ def response(resp):
dom = html.fromstring(resp.content) dom = html.fromstring(resp.content)
# init regex for yaml-parsing # init regex for yaml-parsing
p = re.compile( '({|,)([a-z]+):(")') p = re.compile('({|,)([a-z]+):(")')
# parse results # parse results
for result in dom.xpath('//div[@class="dg_u"]'): for result in dom.xpath('//div[@class="dg_u"]'):
link = result.xpath('./a')[0] link = result.xpath('./a')[0]
# parse yaml-data (it is required to add a space, to make it parsable) # parse yaml-data (it is required to add a space, to make it parsable)
yaml_data = load(p.sub( r'\1\2: \3', link.attrib.get('m'))) yaml_data = load(p.sub(r'\1\2: \3', link.attrib.get('m')))
title = link.attrib.get('t1') title = link.attrib.get('t1')
#url = 'http://' + link.attrib.get('t3') #url = 'http://' + link.attrib.get('t3')
url = yaml_data.get('surl') url = yaml_data.get('surl')
@ -69,7 +71,7 @@ def response(resp):
results.append({'template': 'images.html', results.append({'template': 'images.html',
'url': url, 'url': url,
'title': title, 'title': title,
'content': '', 'content': '',
'img_src': img_src}) 'img_src': img_src})
# TODO stop parsing if 10 images are found # TODO stop parsing if 10 images are found

View File

@ -1,8 +1,9 @@
## Bing (News) ## Bing (News)
# #
# @website https://www.bing.com/news # @website https://www.bing.com/news
# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
# # max. 5000 query/month
#
# @using-api no (because of query limit) # @using-api no (because of query limit)
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)
@ -57,12 +58,12 @@ def response(resp):
url = link.attrib.get('href') url = link.attrib.get('href')
title = ' '.join(link.xpath('.//text()')) title = ' '.join(link.xpath('.//text()'))
contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()') contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()')
if contentXPath != None: if contentXPath is not None:
content = escape(' '.join(contentXPath)) content = escape(' '.join(contentXPath))
# parse publishedDate # parse publishedDate
publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()') publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()')
if publishedDateXPath != None: if publishedDateXPath is not None:
publishedDate = escape(' '.join(publishedDateXPath)) publishedDate = escape(' '.join(publishedDateXPath))
if re.match("^[0-9]+ minute(s|) ago$", publishedDate): if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
@ -89,10 +90,10 @@ def response(resp):
except TypeError: except TypeError:
# FIXME # FIXME
publishedDate = datetime.now() publishedDate = datetime.now()
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'publishedDate': publishedDate, 'publishedDate': publishedDate,
'content': content}) 'content': content})

View File

@ -55,6 +55,6 @@ def response(resp):
resp.search_params['to'].lower() resp.search_params['to'].lower()
) )
results.append({'answer' : answer, 'url': url}) results.append({'answer': answer, 'url': url})
return results return results

View File

@ -1,8 +1,8 @@
## Dailymotion (Videos) ## Dailymotion (Videos)
# #
# @website https://www.dailymotion.com # @website https://www.dailymotion.com
# @provide-api yes (http://www.dailymotion.com/developer) # @provide-api yes (http://www.dailymotion.com/developer)
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes # @stable yes
@ -12,7 +12,6 @@
from urllib import urlencode from urllib import urlencode
from json import loads from json import loads
from lxml import html
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -1,8 +1,8 @@
## Deviantart (Images) ## Deviantart (Images)
# #
# @website https://www.deviantart.com/ # @website https://www.deviantart.com/
# @provide-api yes (https://www.deviantart.com/developers/) (RSS) # @provide-api yes (https://www.deviantart.com/developers/) (RSS)
# #
# @using-api no (TODO, rewrite to api) # @using-api no (TODO, rewrite to api)
# @results HTML # @results HTML
# @stable no (HTML can change) # @stable no (HTML can change)

View File

@ -1,15 +1,17 @@
## DuckDuckGo (Web) ## DuckDuckGo (Web)
# #
# @website https://duckduckgo.com/ # @website https://duckduckgo.com/
# @provide-api yes (https://duckduckgo.com/api), but not all results from search-site # @provide-api yes (https://duckduckgo.com/api),
# # but not all results from search-site
#
# @using-api no # @using-api no
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)
# @parse url, title, content # @parse url, title, content
# #
# @todo rewrite to api # @todo rewrite to api
# @todo language support (the current used site does not support language-change) # @todo language support
# (the current used site does not support language-change)
from urllib import urlencode from urllib import urlencode
from lxml.html import fromstring from lxml.html import fromstring
@ -37,7 +39,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
locale = 'en-us' locale = 'en-us'
else: else:
locale = params['language'].replace('_','-').lower() locale = params['language'].replace('_', '-').lower()
params['url'] = url.format( params['url'] = url.format(
query=urlencode({'q': query, 'kl': locale}), query=urlencode({'q': query, 'kl': locale}),

View File

@ -3,21 +3,25 @@ from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
url = 'https://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1&d=1' url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
def result_to_text(url, text, htmlResult): def result_to_text(url, text, htmlResult):
# TODO : remove result ending with "Meaning" or "Category" # TODO : remove result ending with "Meaning" or "Category"
dom = html.fromstring(htmlResult) dom = html.fromstring(htmlResult)
a = dom.xpath('//a') a = dom.xpath('//a')
if len(a)>=1: if len(a) >= 1:
return extract_text(a[0]) return extract_text(a[0])
else: else:
return text return text
def html_to_text(htmlFragment): def html_to_text(htmlFragment):
dom = html.fromstring(htmlFragment) dom = html.fromstring(htmlFragment)
return extract_text(dom) return extract_text(dom)
def request(query, params): def request(query, params):
# TODO add kl={locale} # TODO add kl={locale}
params['url'] = url.format(query=urlencode({'q': query})) params['url'] = url.format(query=urlencode({'q': query}))
@ -38,16 +42,15 @@ def response(resp):
# add answer if there is one # add answer if there is one
answer = search_res.get('Answer', '') answer = search_res.get('Answer', '')
if answer != '': if answer != '':
results.append({ 'answer' : html_to_text(answer) }) results.append({'answer': html_to_text(answer)})
# add infobox # add infobox
if 'Definition' in search_res: if 'Definition' in search_res:
content = content + search_res.get('Definition', '') content = content + search_res.get('Definition', '')
if 'Abstract' in search_res: if 'Abstract' in search_res:
content = content + search_res.get('Abstract', '') content = content + search_res.get('Abstract', '')
# image # image
image = search_res.get('Image', '') image = search_res.get('Image', '')
image = None if image == '' else image image = None if image == '' else image
@ -55,29 +58,35 @@ def response(resp):
# attributes # attributes
if 'Infobox' in search_res: if 'Infobox' in search_res:
infobox = search_res.get('Infobox', None) infobox = search_res.get('Infobox', None)
if 'content' in infobox: if 'content' in infobox:
for info in infobox.get('content'): for info in infobox.get('content'):
attributes.append({'label': info.get('label'), 'value': info.get('value')}) attributes.append({'label': info.get('label'),
'value': info.get('value')})
# urls # urls
for ddg_result in search_res.get('Results', []): for ddg_result in search_res.get('Results', []):
if 'FirstURL' in ddg_result: if 'FirstURL' in ddg_result:
firstURL = ddg_result.get('FirstURL', '') firstURL = ddg_result.get('FirstURL', '')
text = ddg_result.get('Text', '') text = ddg_result.get('Text', '')
urls.append({'title':text, 'url':firstURL}) urls.append({'title': text, 'url': firstURL})
results.append({'title':heading, 'url': firstURL}) results.append({'title': heading, 'url': firstURL})
# related topics # related topics
for ddg_result in search_res.get('RelatedTopics', None): for ddg_result in search_res.get('RelatedTopics', None):
if 'FirstURL' in ddg_result: if 'FirstURL' in ddg_result:
suggestion = result_to_text(ddg_result.get('FirstURL', None), ddg_result.get('Text', None), ddg_result.get('Result', None)) suggestion = result_to_text(ddg_result.get('FirstURL', None),
ddg_result.get('Text', None),
ddg_result.get('Result', None))
if suggestion != heading: if suggestion != heading:
results.append({'suggestion': suggestion}) results.append({'suggestion': suggestion})
elif 'Topics' in ddg_result: elif 'Topics' in ddg_result:
suggestions = [] suggestions = []
relatedTopics.append({ 'name' : ddg_result.get('Name', ''), 'suggestions': suggestions }) relatedTopics.append({'name': ddg_result.get('Name', ''),
'suggestions': suggestions})
for topic_result in ddg_result.get('Topics', []): for topic_result in ddg_result.get('Topics', []):
suggestion = result_to_text(topic_result.get('FirstURL', None), topic_result.get('Text', None), topic_result.get('Result', None)) suggestion = result_to_text(topic_result.get('FirstURL', None),
topic_result.get('Text', None),
topic_result.get('Result', None))
if suggestion != heading: if suggestion != heading:
suggestions.append(suggestion) suggestions.append(suggestion)
@ -86,21 +95,26 @@ def response(resp):
if abstractURL != '': if abstractURL != '':
# add as result ? problem always in english # add as result ? problem always in english
infobox_id = abstractURL infobox_id = abstractURL
urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL}) urls.append({'title': search_res.get('AbstractSource'),
'url': abstractURL})
# definition # definition
definitionURL = search_res.get('DefinitionURL', '') definitionURL = search_res.get('DefinitionURL', '')
if definitionURL != '': if definitionURL != '':
# add as result ? as answer ? problem always in english # add as result ? as answer ? problem always in english
infobox_id = definitionURL infobox_id = definitionURL
urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) urls.append({'title': search_res.get('DefinitionSource'),
'url': definitionURL})
# entity # entity
entity = search_res.get('Entity', None) entity = search_res.get('Entity', None)
# TODO continent / country / department / location / waterfall / mountain range : link to map search, get weather, near by locations # TODO continent / country / department / location / waterfall /
# mountain range :
# link to map search, get weather, near by locations
# TODO musician : link to music search # TODO musician : link to music search
# TODO concert tour : ?? # TODO concert tour : ??
# TODO film / actor / television / media franchise : links to IMDB / rottentomatoes (or scrap result) # TODO film / actor / television / media franchise :
# links to IMDB / rottentomatoes (or scrap result)
# TODO music : link tu musicbrainz / last.fm # TODO music : link tu musicbrainz / last.fm
# TODO book : ?? # TODO book : ??
# TODO artist / playwright : ?? # TODO artist / playwright : ??
@ -114,24 +128,25 @@ def response(resp):
# TODO programming language : ?? # TODO programming language : ??
# TODO file format : ?? # TODO file format : ??
if len(heading)>0: if len(heading) > 0:
# TODO get infobox.meta.value where .label='article_title' # TODO get infobox.meta.value where .label='article_title'
if image==None and len(attributes)==0 and len(urls)==1 and len(relatedTopics)==0 and len(content)==0: if image is None and len(attributes) == 0 and len(urls) == 1 and\
len(relatedTopics) == 0 and len(content) == 0:
results.append({ results.append({
'url': urls[0]['url'], 'url': urls[0]['url'],
'title': heading, 'title': heading,
'content': content 'content': content
}) })
else: else:
results.append({ results.append({
'infobox': heading, 'infobox': heading,
'id': infobox_id, 'id': infobox_id,
'entity': entity, 'entity': entity,
'content': content, 'content': content,
'img_src' : image, 'img_src': image,
'attributes': attributes, 'attributes': attributes,
'urls': urls, 'urls': urls,
'relatedTopics': relatedTopics 'relatedTopics': relatedTopics
}) })
return results return results

View File

@ -1,5 +1,5 @@
## Dummy ## Dummy
# #
# @results empty array # @results empty array
# @stable yes # @stable yes

View File

@ -1,8 +1,8 @@
## Faroo (Web, News) ## Faroo (Web, News)
# #
# @website http://www.faroo.com # @website http://www.faroo.com
# @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key # @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes # @stable yes
@ -24,9 +24,10 @@ api_key = None
url = 'http://www.faroo.com/' url = 'http://www.faroo.com/'
search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={language}&src={categorie}&i=false&f=json&key={api_key}' search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={language}&src={categorie}&i=false&f=json&key={api_key}'
search_category = {'general': 'web', search_category = {'general': 'web',
'news': 'news'} 'news': 'news'}
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno']-1) * number_of_results + 1 offset = (params['pageno']-1) * number_of_results + 1
@ -48,7 +49,7 @@ def request(query, params):
query=urlencode({'q': query}), query=urlencode({'q': query}),
language=language, language=language,
categorie=categorie, categorie=categorie,
api_key=api_key ) api_key=api_key)
# using searx User-Agent # using searx User-Agent
params['headers']['User-Agent'] = searx_useragent() params['headers']['User-Agent'] = searx_useragent()
@ -101,7 +102,7 @@ def response(resp):
results.append({'template': 'images.html', results.append({'template': 'images.html',
'url': result['url'], 'url': result['url'],
'title': result['title'], 'title': result['title'],
'content': result['kwic'], 'content': result['kwic'],
'img_src': result['iurl']}) 'img_src': result['iurl']})
# return results # return results

View File

@ -1,8 +1,8 @@
## General Files (Files) ## General Files (Files)
# #
# @website http://www.general-files.org # @website http://www.general-files.org
# @provide-api no (nothing found) # @provide-api no (nothing found)
# #
# @using-api no (because nothing found) # @using-api no (because nothing found)
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)

View File

@ -1,8 +1,8 @@
## Github (It) ## Github (It)
# #
# @website https://github.com/ # @website https://github.com/
# @provide-api yes (https://developer.github.com/v3/) # @provide-api yes (https://developer.github.com/v3/)
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes (using api) # @stable yes (using api)

View File

@ -1,8 +1,9 @@
## Google (Images) ## Google (Images)
# #
# @website https://www.google.com # @website https://www.google.com
# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! # @provide-api yes (https://developers.google.com/web-search/docs/),
# # deprecated!
#
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes (but deprecated) # @stable yes (but deprecated)

View File

@ -1,8 +1,9 @@
## Google (News) ## Google (News)
# #
# @website https://www.google.com # @website https://www.google.com
# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! # @provide-api yes (https://developers.google.com/web-search/docs/),
# # deprecated!
#
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes (but deprecated) # @stable yes (but deprecated)

View File

@ -39,16 +39,16 @@ def response(resp):
url = result_base_url.format(osm_type=osm_type, url = result_base_url.format(osm_type=osm_type,
osm_id=r['osm_id']) osm_id=r['osm_id'])
osm = {'type':osm_type, osm = {'type': osm_type,
'id':r['osm_id']} 'id': r['osm_id']}
geojson = r.get('geojson') geojson = r.get('geojson')
# if no geojson is found and osm_type is a node, add geojson Point # if no geojson is found and osm_type is a node, add geojson Point
if not geojson and\ if not geojson and\
osm_type == 'node': osm_type == 'node':
geojson = {u'type':u'Point', geojson = {u'type': u'Point',
u'coordinates':[r['lon'],r['lat']]} u'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address') address_raw = r.get('address')
address = {} address = {}
@ -59,20 +59,20 @@ def response(resp):
r['class'] == 'tourism' or\ r['class'] == 'tourism' or\
r['class'] == 'leisure': r['class'] == 'leisure':
if address_raw.get('address29'): if address_raw.get('address29'):
address = {'name':address_raw.get('address29')} address = {'name': address_raw.get('address29')}
else: else:
address = {'name':address_raw.get(r['type'])} address = {'name': address_raw.get(r['type'])}
# add rest of adressdata, if something is already found # add rest of adressdata, if something is already found
if address.get('name'): if address.get('name'):
address.update({'house_number':address_raw.get('house_number'), address.update({'house_number': address_raw.get('house_number'),
'road':address_raw.get('road'), 'road': address_raw.get('road'),
'locality':address_raw.get('city', 'locality': address_raw.get('city',
address_raw.get('town', address_raw.get('town',
address_raw.get('village'))), address_raw.get('village'))),
'postcode':address_raw.get('postcode'), 'postcode': address_raw.get('postcode'),
'country':address_raw.get('country'), 'country': address_raw.get('country'),
'country_code':address_raw.get('country_code')}) 'country_code': address_raw.get('country_code')})
else: else:
address = None address = None

View File

@ -1,8 +1,8 @@
## Piratebay (Videos, Music, Files) ## Piratebay (Videos, Music, Files)
# #
# @website https://thepiratebay.se # @website https://thepiratebay.se
# @provide-api no (nothing found) # @provide-api no (nothing found)
# #
# @using-api no # @using-api no
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable yes (HTML can change) # @stable yes (HTML can change)
@ -23,7 +23,7 @@ url = 'https://thepiratebay.se/'
search_url = url + 'search/{search_term}/{pageno}/99/{search_type}' search_url = url + 'search/{search_term}/{pageno}/99/{search_type}'
# piratebay specific type-definitions # piratebay specific type-definitions
search_types = {'files': '0', search_types = {'files': '0',
'music': '100', 'music': '100',
'videos': '200'} 'videos': '200'}

View File

@ -1,8 +1,8 @@
## Soundcloud (Music) ## Soundcloud (Music)
# #
# @website https://soundcloud.com # @website https://soundcloud.com
# @provide-api yes (https://developers.soundcloud.com/) # @provide-api yes (https://developers.soundcloud.com/)
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes # @stable yes

View File

@ -1,8 +1,8 @@
## Stackoverflow (It) ## Stackoverflow (It)
# #
# @website https://stackoverflow.com/ # @website https://stackoverflow.com/
# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) # @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
# #
# @using-api no # @using-api no
# @results HTML # @results HTML
# @stable no (HTML can change) # @stable no (HTML can change)
@ -50,8 +50,8 @@ def response(resp):
content = escape(' '.join(result.xpath(content_xpath))) content = escape(' '.join(result.xpath(content_xpath)))
# append result # append result
results.append({'url': href, results.append({'url': href,
'title': title, 'title': title,
'content': content}) 'content': content})
# return results # return results

View File

@ -1,8 +1,8 @@
## Twitter (Social media) ## Twitter (Social media)
# #
# @website https://www.bing.com/news # @website https://www.bing.com/news
# @provide-api yes (https://dev.twitter.com/docs/using-search) # @provide-api yes (https://dev.twitter.com/docs/using-search)
# #
# @using-api no # @using-api no
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)

View File

@ -1,8 +1,9 @@
## Vimeo (Videos) ## Vimeo (Videos)
# #
# @website https://vimeo.com/ # @website https://vimeo.com/
# @provide-api yes (http://developer.vimeo.com/api), they have a maximum count of queries/hour # @provide-api yes (http://developer.vimeo.com/api),
# # they have a maximum count of queries/hour
#
# @using-api no (TODO, rewrite to api) # @using-api no (TODO, rewrite to api)
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)
@ -35,11 +36,12 @@ publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(pageno=params['pageno'] , params['url'] = search_url.format(pageno=params['pageno'],
query=urlencode({'q': query})) query=urlencode({'q': query}))
# TODO required? # TODO required?
params['cookies']['__utma'] = '00000000.000#0000000.0000000000.0000000000.0000000000.0' params['cookies']['__utma'] =\
'00000000.000#0000000.0000000000.0000000000.0000000000.0'
return params return params

View File

@ -2,13 +2,25 @@ import json
from requests import get from requests import get
from urllib import urlencode from urllib import urlencode
resultCount=1 result_count = 1
urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}' wikidata_host = 'https://www.wikidata.org'
urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}' wikidata_api = wikidata_host + '/w/api.php'
urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M' url_search = wikidata_api \
+ '?action=query&list=search&format=json'\
+ '&srnamespace=0&srprop=sectiontitle&{query}'
url_detail = wikidata_api\
+ '?action=wbgetentities&format=json'\
+ '&props=labels%7Cinfo%7Csitelinks'\
+ '%7Csitelinks%2Furls%7Cdescriptions%7Cclaims'\
+ '&{query}'
url_map = 'https://www.openstreetmap.org/'\
+ '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
def request(query, params): def request(query, params):
params['url'] = urlSearch.format(query=urlencode({'srsearch': query, 'srlimit': resultCount})) params['url'] = url_search.format(
query=urlencode({'srsearch': query,
'srlimit': result_count}))
return params return params
@ -23,7 +35,8 @@ def response(resp):
language = resp.search_params['language'].split('_')[0] language = resp.search_params['language'].split('_')[0]
if language == 'all': if language == 'all':
language = 'en' language = 'en'
url = urlDetail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'})) url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids),
'languages': language + '|en'}))
htmlresponse = get(url) htmlresponse = get(url)
jsonresponse = json.loads(htmlresponse.content) jsonresponse = json.loads(htmlresponse.content)
@ -32,6 +45,7 @@ def response(resp):
return results return results
def getDetail(jsonresponse, wikidata_id, language): def getDetail(jsonresponse, wikidata_id, language):
results = [] results = []
urls = [] urls = []
@ -40,60 +54,103 @@ def getDetail(jsonresponse, wikidata_id, language):
result = jsonresponse.get('entities', {}).get(wikidata_id, {}) result = jsonresponse.get('entities', {}).get(wikidata_id, {})
title = result.get('labels', {}).get(language, {}).get('value', None) title = result.get('labels', {}).get(language, {}).get('value', None)
if title == None: if title is None:
title = result.get('labels', {}).get('en', {}).get('value', None) title = result.get('labels', {}).get('en', {}).get('value', None)
if title == None: if title is None:
return results return results
description = result.get('descriptions', {}).get(language, {}).get('value', None) description = result\
if description == None: .get('descriptions', {})\
description = result.get('descriptions', {}).get('en', {}).get('value', '') .get(language, {})\
.get('value', None)
if description is None:
description = result\
.get('descriptions', {})\
.get('en', {})\
.get('value', '')
claims = result.get('claims', {}) claims = result.get('claims', {})
official_website = get_string(claims, 'P856', None) official_website = get_string(claims, 'P856', None)
if official_website != None: if official_website is not None:
urls.append({ 'title' : 'Official site', 'url': official_website }) urls.append({'title': 'Official site', 'url': official_website})
results.append({ 'title': title, 'url' : official_website }) results.append({'title': title, 'url': official_website})
wikipedia_link_count = 0 wikipedia_link_count = 0
if language != 'en': if language != 'en':
wikipedia_link_count += add_url(urls, 'Wikipedia (' + language + ')', get_wikilink(result, language + 'wiki')) wikipedia_link_count += add_url(urls,
'Wikipedia (' + language + ')',
get_wikilink(result, language +
'wiki'))
wikipedia_en_link = get_wikilink(result, 'enwiki') wikipedia_en_link = get_wikilink(result, 'enwiki')
wikipedia_link_count += add_url(urls, 'Wikipedia (en)', wikipedia_en_link) wikipedia_link_count += add_url(urls,
'Wikipedia (en)',
wikipedia_en_link)
if wikipedia_link_count == 0: if wikipedia_link_count == 0:
misc_language = get_wiki_firstlanguage(result, 'wiki') misc_language = get_wiki_firstlanguage(result, 'wiki')
if misc_language != None: if misc_language is not None:
add_url(urls, 'Wikipedia (' + misc_language + ')', get_wikilink(result, misc_language + 'wiki')) add_url(urls,
'Wikipedia (' + misc_language + ')',
get_wikilink(result, misc_language + 'wiki'))
if language != 'en': if language != 'en':
add_url(urls, 'Wiki voyage (' + language + ')', get_wikilink(result, language + 'wikivoyage')) add_url(urls,
add_url(urls, 'Wiki voyage (en)', get_wikilink(result, 'enwikivoyage')) 'Wiki voyage (' + language + ')',
get_wikilink(result, language + 'wikivoyage'))
add_url(urls,
'Wiki voyage (en)',
get_wikilink(result, 'enwikivoyage'))
if language != 'en': if language != 'en':
add_url(urls, 'Wikiquote (' + language + ')', get_wikilink(result, language + 'wikiquote')) add_url(urls,
add_url(urls, 'Wikiquote (en)', get_wikilink(result, 'enwikiquote')) 'Wikiquote (' + language + ')',
get_wikilink(result, language + 'wikiquote'))
add_url(urls, 'Commons wiki', get_wikilink(result, 'commonswiki')) add_url(urls,
'Wikiquote (en)',
get_wikilink(result, 'enwikiquote'))
add_url(urls, 'Location', get_geolink(claims, 'P625', None)) add_url(urls,
'Commons wiki',
get_wikilink(result, 'commonswiki'))
add_url(urls, 'Wikidata', 'https://www.wikidata.org/wiki/' + wikidata_id + '?uselang='+ language) add_url(urls,
'Location',
get_geolink(claims, 'P625', None))
add_url(urls,
'Wikidata',
'https://www.wikidata.org/wiki/'
+ wikidata_id + '?uselang=' + language)
musicbrainz_work_id = get_string(claims, 'P435') musicbrainz_work_id = get_string(claims, 'P435')
if musicbrainz_work_id != None: if musicbrainz_work_id is not None:
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/work/' + musicbrainz_work_id) add_url(urls,
'MusicBrainz',
'http://musicbrainz.org/work/'
+ musicbrainz_work_id)
musicbrainz_artist_id = get_string(claims, 'P434') musicbrainz_artist_id = get_string(claims, 'P434')
if musicbrainz_artist_id != None: if musicbrainz_artist_id is not None:
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/artist/' + musicbrainz_artist_id) add_url(urls,
'MusicBrainz',
'http://musicbrainz.org/artist/'
+ musicbrainz_artist_id)
musicbrainz_release_group_id = get_string(claims, 'P436') musicbrainz_release_group_id = get_string(claims, 'P436')
if musicbrainz_release_group_id != None: if musicbrainz_release_group_id is not None:
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/release-group/' + musicbrainz_release_group_id) add_url(urls,
'MusicBrainz',
'http://musicbrainz.org/release-group/'
+ musicbrainz_release_group_id)
musicbrainz_label_id = get_string(claims, 'P966') musicbrainz_label_id = get_string(claims, 'P966')
if musicbrainz_label_id != None: if musicbrainz_label_id is not None:
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/label/' + musicbrainz_label_id) add_url(urls,
'MusicBrainz',
'http://musicbrainz.org/label/'
+ musicbrainz_label_id)
# musicbrainz_area_id = get_string(claims, 'P982') # musicbrainz_area_id = get_string(claims, 'P982')
# P1407 MusicBrainz series ID # P1407 MusicBrainz series ID
@ -102,42 +159,43 @@ def getDetail(jsonresponse, wikidata_id, language):
# P1407 MusicBrainz series ID # P1407 MusicBrainz series ID
postal_code = get_string(claims, 'P281', None) postal_code = get_string(claims, 'P281', None)
if postal_code != None: if postal_code is not None:
attributes.append({'label' : 'Postal code(s)', 'value' : postal_code}) attributes.append({'label': 'Postal code(s)', 'value': postal_code})
date_of_birth = get_time(claims, 'P569', None) date_of_birth = get_time(claims, 'P569', None)
if date_of_birth != None: if date_of_birth is not None:
attributes.append({'label' : 'Date of birth', 'value' : date_of_birth}) attributes.append({'label': 'Date of birth', 'value': date_of_birth})
date_of_death = get_time(claims, 'P570', None) date_of_death = get_time(claims, 'P570', None)
if date_of_death != None: if date_of_death is not None:
attributes.append({'label' : 'Date of death', 'value' : date_of_death}) attributes.append({'label': 'Date of death', 'value': date_of_death})
if len(attributes)==0 and len(urls)==2 and len(description)==0: if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
results.append({ results.append({
'url': urls[0]['url'], 'url': urls[0]['url'],
'title': title, 'title': title,
'content': description 'content': description
}) })
else: else:
results.append({ results.append({
'infobox' : title, 'infobox': title,
'id' : wikipedia_en_link, 'id': wikipedia_en_link,
'content' : description, 'content': description,
'attributes' : attributes, 'attributes': attributes,
'urls' : urls 'urls': urls
}) })
return results return results
def add_url(urls, title, url): def add_url(urls, title, url):
if url != None: if url is not None:
urls.append({'title' : title, 'url' : url}) urls.append({'title': title, 'url': url})
return 1 return 1
else: else:
return 0 return 0
def get_mainsnak(claims, propertyName): def get_mainsnak(claims, propertyName):
propValue = claims.get(propertyName, {}) propValue = claims.get(propertyName, {})
if len(propValue) == 0: if len(propValue) == 0:
@ -157,7 +215,7 @@ def get_string(claims, propertyName, defaultValue=None):
mainsnak = e.get('mainsnak', {}) mainsnak = e.get('mainsnak', {})
datavalue = mainsnak.get('datavalue', {}) datavalue = mainsnak.get('datavalue', {})
if datavalue != None: if datavalue is not None:
result.append(datavalue.get('value', '')) result.append(datavalue.get('value', ''))
if len(result) == 0: if len(result) == 0:
@ -177,7 +235,7 @@ def get_time(claims, propertyName, defaultValue=None):
mainsnak = e.get('mainsnak', {}) mainsnak = e.get('mainsnak', {})
datavalue = mainsnak.get('datavalue', {}) datavalue = mainsnak.get('datavalue', {})
if datavalue != None: if datavalue is not None:
value = datavalue.get('value', '') value = datavalue.get('value', '')
result.append(value.get('time', '')) result.append(value.get('time', ''))
@ -190,7 +248,7 @@ def get_time(claims, propertyName, defaultValue=None):
def get_geolink(claims, propertyName, defaultValue=''): def get_geolink(claims, propertyName, defaultValue=''):
mainsnak = get_mainsnak(claims, propertyName) mainsnak = get_mainsnak(claims, propertyName)
if mainsnak == None: if mainsnak is None:
return defaultValue return defaultValue
datatype = mainsnak.get('datatype', '') datatype = mainsnak.get('datatype', '')
@ -209,21 +267,25 @@ def get_geolink(claims, propertyName, defaultValue=''):
# 1 --> 6 # 1 --> 6
# 0.016666666666667 --> 9 # 0.016666666666667 --> 9
# 0.00027777777777778 --> 19 # 0.00027777777777778 --> 19
# wolframalpha : quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}} # wolframalpha :
# quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}}
# 14.1186-8.8322 x+0.625447 x^2 # 14.1186-8.8322 x+0.625447 x^2
if precision < 0.0003: if precision < 0.0003:
zoom = 19 zoom = 19
else: else:
zoom = int(15 - precision*8.8322 + precision*precision*0.625447) zoom = int(15 - precision*8.8322 + precision*precision*0.625447)
url = urlMap.replace('{latitude}', str(value.get('latitude',0))).replace('{longitude}', str(value.get('longitude',0))).replace('{zoom}', str(zoom)) url = url_map\
.replace('{latitude}', str(value.get('latitude', 0)))\
.replace('{longitude}', str(value.get('longitude', 0)))\
.replace('{zoom}', str(zoom))
return url return url
def get_wikilink(result, wikiid): def get_wikilink(result, wikiid):
url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None) url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None)
if url == None: if url is None:
return url return url
elif url.startswith('http://'): elif url.startswith('http://'):
url = url.replace('http://', 'https://') url = url.replace('http://', 'https://')
@ -231,8 +293,9 @@ def get_wikilink(result, wikiid):
url = 'https:' + url url = 'https:' + url
return url return url
def get_wiki_firstlanguage(result, wikipatternid): def get_wiki_firstlanguage(result, wikipatternid):
for k in result.get('sitelinks', {}).keys(): for k in result.get('sitelinks', {}).keys():
if k.endswith(wikipatternid) and len(k)==(2+len(wikipatternid)): if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)):
return k[0:2] return k[0:2]
return None return None

View File

@ -1,8 +1,9 @@
## Yacy (Web, Images, Videos, Music, Files) ## Yacy (Web, Images, Videos, Music, Files)
# #
# @website http://yacy.net # @website http://yacy.net
# @provide-api yes (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) # @provide-api yes
# # (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
#
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes # @stable yes
@ -16,7 +17,7 @@ from urllib import urlencode
from dateutil import parser from dateutil import parser
# engine dependent config # engine dependent config
categories = ['general', 'images'] #TODO , 'music', 'videos', 'files' categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
paging = True paging = True
language_support = True language_support = True
number_of_results = 5 number_of_results = 5
@ -28,7 +29,7 @@ search_url = '/yacysearch.json?{query}&startRecord={offset}&maximumRecords={limi
# yacy specific type-definitions # yacy specific type-definitions
search_types = {'general': 'text', search_types = {'general': 'text',
'images': 'image', 'images': 'image',
'files': 'app', 'files': 'app',
'music': 'audio', 'music': 'audio',
'videos': 'video'} 'videos': 'video'}

View File

@ -1,8 +1,9 @@
## Yahoo (Web) ## Yahoo (Web)
# #
# @website https://search.yahoo.com/web # @website https://search.yahoo.com/web
# @provide-api yes (https://developer.yahoo.com/boss/search/), $0.80/1000 queries # @provide-api yes (https://developer.yahoo.com/boss/search/),
# # $0.80/1000 queries
#
# @using-api no (because pricing) # @using-api no (because pricing)
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)
@ -40,8 +41,8 @@ def parse_url(url_string):
if endpos > -1: if endpos > -1:
endpositions.append(endpos) endpositions.append(endpos)
if start==0 or len(endpositions) == 0: if start == 0 or len(endpositions) == 0:
return url_string return url_string
else: else:
end = min(endpositions) end = min(endpositions)
return unquote(url_string[start:end]) return unquote(url_string[start:end])
@ -84,8 +85,8 @@ def response(resp):
content = extract_text(result.xpath(content_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0])
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'content': content}) 'content': content})
# if no suggestion found, return results # if no suggestion found, return results

View File

@ -1,8 +1,8 @@
## Youtube (Videos) ## Youtube (Videos)
# #
# @website https://www.youtube.com/ # @website https://www.youtube.com/
# @provide-api yes (http://gdata-samples-youtube-search-py.appspot.com/) # @provide-api yes (http://gdata-samples-youtube-search-py.appspot.com/)
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON
# @stable yes # @stable yes