[enh] py3 compatibility

This commit is contained in:
Adam Tauber 2016-11-30 18:43:03 +01:00
parent 46a2c63f8e
commit 52e615dede
115 changed files with 517 additions and 513 deletions

View File

@ -9,6 +9,7 @@ addons:
language: python language: python
python: python:
- "2.7" - "2.7"
- "3.6"
before_install: before_install:
- "export DISPLAY=:99.0" - "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start" - "sh -e /etc/init.d/xvfb start"
@ -24,9 +25,9 @@ script:
- ./manage.sh styles - ./manage.sh styles
- ./manage.sh grunt_build - ./manage.sh grunt_build
- ./manage.sh tests - ./manage.sh tests
- ./manage.sh py_test_coverage
after_success: after_success:
coveralls - ./manage.sh py_test_coverage
- coveralls
notifications: notifications:
irc: irc:
channels: channels:

View File

@ -3,8 +3,7 @@ mock==2.0.0
nose2[coverage-plugin] nose2[coverage-plugin]
pep8==1.7.0 pep8==1.7.0
plone.testing==5.0.0 plone.testing==5.0.0
robotframework-selenium2library==1.8.0 splinter==0.7.5
robotsuite==1.7.0
transifex-client==0.12.2 transifex-client==0.12.2
unittest2==1.1.0 unittest2==1.1.0
zope.testrunner==4.5.1 zope.testrunner==4.5.1

View File

@ -1,8 +1,12 @@
from os import listdir from os import listdir
from os.path import realpath, dirname, join, isdir from os.path import realpath, dirname, join, isdir
from sys import version_info
from searx.utils import load_module from searx.utils import load_module
from collections import defaultdict from collections import defaultdict
if version_info[0] == 3:
unicode = str
answerers_dir = dirname(realpath(__file__)) answerers_dir = dirname(realpath(__file__))
@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__))
def load_answerers(): def load_answerers():
answerers = [] answerers = []
for filename in listdir(answerers_dir): for filename in listdir(answerers_dir):
if not isdir(join(answerers_dir, filename)): if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
continue continue
module = load_module('answerer.py', join(answerers_dir, filename)) module = load_module('answerer.py', join(answerers_dir, filename))
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords): if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers):
def ask(query): def ask(query):
results = [] results = []
query_parts = filter(None, query.query.split()) query_parts = list(filter(None, query.query.split()))
if query_parts[0] not in answerers_by_keywords: if query_parts[0].decode('utf-8') not in answerers_by_keywords:
return results return results
for answerer in answerers_by_keywords[query_parts[0]]: for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
result = answerer(query) result = answerer(query)
if result: if result:
results.append(result) results.append(result)

View File

@ -1,5 +1,6 @@
import random import random
import string import string
import sys
from flask_babel import gettext from flask_babel import gettext
# required answerer attribute # required answerer attribute
@ -8,7 +9,11 @@ keywords = ('random',)
random_int_max = 2**31 random_int_max = 2**31
if sys.version_info[0] == 2:
random_string_letters = string.lowercase + string.digits + string.uppercase random_string_letters = string.lowercase + string.digits + string.uppercase
else:
unicode = str
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_string(): def random_string():
@ -24,9 +29,9 @@ def random_int():
return unicode(random.randint(-random_int_max, random_int_max)) return unicode(random.randint(-random_int_max, random_int_max))
random_types = {u'string': random_string, random_types = {b'string': random_string,
u'int': random_int, b'int': random_int,
u'float': random_float} b'float': random_float}
# required answerer function # required answerer function

View File

@ -1,8 +1,12 @@
from sys import version_info
from functools import reduce from functools import reduce
from operator import mul from operator import mul
from flask_babel import gettext from flask_babel import gettext
if version_info[0] == 3:
unicode = str
keywords = ('min', keywords = ('min',
'max', 'max',
'avg', 'avg',
@ -19,22 +23,22 @@ def answer(query):
return [] return []
try: try:
args = map(float, parts[1:]) args = list(map(float, parts[1:]))
except: except:
return [] return []
func = parts[0] func = parts[0]
answer = None answer = None
if func == 'min': if func == b'min':
answer = min(args) answer = min(args)
elif func == 'max': elif func == b'max':
answer = max(args) answer = max(args)
elif func == 'avg': elif func == b'avg':
answer = sum(args) / len(args) answer = sum(args) / len(args)
elif func == 'sum': elif func == b'sum':
answer = sum(args) answer = sum(args)
elif func == 'prod': elif func == b'prod':
answer = reduce(mul, args, 1) answer = reduce(mul, args, 1)
if answer is None: if answer is None:

View File

@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from lxml import etree from lxml import etree
from json import loads from json import loads
from urllib import urlencode
from searx import settings from searx import settings
from searx.languages import language_codes from searx.languages import language_codes
from searx.engines import ( from searx.engines import (
@ -26,6 +25,11 @@ from searx.engines import (
) )
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
try:
from urllib import urlencode
except:
from urllib.parse import urlencode
def get(*args, **kwargs): def get(*args, **kwargs):
if 'timeout' not in kwargs: if 'timeout' not in kwargs:

View File

@ -1,8 +1,7 @@
from urllib import quote
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
from urlparse import urljoin from searx.url_utils import quote, urljoin
url = 'https://1337x.to/' url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/' search_url = url + 'search/{search_term}/{pageno}/'

View File

@ -72,12 +72,11 @@ def load_engine(engine_data):
if engine_data['categories'] == 'none': if engine_data['categories'] == 'none':
engine.categories = [] engine.categories = []
else: else:
engine.categories = map( engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
str.strip, engine_data['categories'].split(','))
continue continue
setattr(engine, param_name, engine_data[param_name]) setattr(engine, param_name, engine_data[param_name])
for arg_name, arg_value in engine_default_args.iteritems(): for arg_name, arg_value in engine_default_args.items():
if not hasattr(engine, arg_name): if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value) setattr(engine, arg_name, arg_value)

View File

@ -11,10 +11,9 @@
@parse url, title @parse url, title
""" """
from urlparse import urljoin
from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -14,10 +14,10 @@
""" """
from lxml import etree from lxml import etree
from urllib import urlencode
from searx.utils import searx_useragent
from datetime import datetime from datetime import datetime
import re import re
from searx.url_utils import urlencode
from searx.utils import searx_useragent
categories = ['science'] categories = ['science']
@ -73,7 +73,7 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
search_results = etree.XML(resp.content) search_results = etree.XML(resp.text)
for entry in search_results.xpath('./result/doc'): for entry in search_results.xpath('./result/doc'):
content = "No description available" content = "No description available"

View File

@ -13,9 +13,9 @@
@todo publishedDate @todo publishedDate
""" """
from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']

View File

@ -15,11 +15,11 @@
limited response to 10 images limited response to 10 images
""" """
from urllib import urlencode
from lxml import html from lxml import html
from json import loads from json import loads
import re import re
from searx.engines.bing import _fetch_supported_languages, supported_languages_url from searx.engines.bing import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -11,13 +11,12 @@
@parse url, title, content, publishedDate, thumbnail @parse url, title, content, publishedDate, thumbnail
""" """
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from datetime import datetime from datetime import datetime
from dateutil import parser from dateutil import parser
from lxml import etree from lxml import etree
from searx.utils import list_get from searx.utils import list_get
from searx.engines.bing import _fetch_supported_languages, supported_languages_url from searx.engines.bing import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode, urlparse, parse_qsl
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']
@ -86,7 +85,7 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
rss = etree.fromstring(resp.content) rss = etree.fromstring(resp.text)
ns = rss.nsmap ns = rss.nsmap

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from urllib import urlencode from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -10,11 +10,10 @@
@parse url, title, content, seed, leech, magnetlink @parse url, title, content, seed, leech, magnetlink
""" """
from urlparse import urljoin
from urllib import quote
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
# engine dependent config # engine dependent config
@ -38,7 +37,7 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.content) dom = html.fromstring(resp.text)
search_res = dom.xpath('//div[@id="search_res"]/table/tr') search_res = dom.xpath('//div[@id="search_res"]/table/tr')

View File

@ -1,21 +1,25 @@
from datetime import datetime import json
import re import re
import os import os
import json import sys
import unicodedata import unicodedata
from datetime import datetime
if sys.version_info[0] == 3:
unicode = str
categories = [] categories = []
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
weight = 100 weight = 100
parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1 db = 1
def normalize_name(name): def normalize_name(name):
name = name.lower().replace('-', ' ').rstrip('s') name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name) name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower() return unicodedata.normalize('NFKD', name).lower()
@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
def request(query, params): def request(query, params):
m = parser_re.match(unicode(query, 'utf8')) m = parser_re.match(query)
if not m: if not m:
# wrong query # wrong query
return params return params

View File

@ -12,10 +12,9 @@
@todo set content-parameter with correct data @todo set content-parameter with correct data
""" """
from urllib import urlencode
from json import loads from json import loads
from datetime import datetime from datetime import datetime
from requests import get from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from urllib import urlencode from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 25 offset = (params['pageno'] - 1) * 25
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
offset=offset)
return params return params

View File

@ -12,10 +12,10 @@
@todo rewrite to api @todo rewrite to api
""" """
from urllib import urlencode
from lxml import html from lxml import html
import re import re
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -10,20 +10,20 @@
""" """
import re import re
from urlparse import urljoin
from lxml import html from lxml import html
from searx.utils import is_valid_lang from searx.utils import is_valid_lang
from searx.url_utils import urljoin
categories = ['general'] categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100 weight = 100
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr' results_xpath = './/table[@id="r"]/tr'
def request(query, params): def request(query, params):
m = parser_re.match(unicode(query, 'utf8')) m = parser_re.match(query)
if not m: if not m:
return params return params

View File

@ -10,10 +10,14 @@
@parse url, title, content, magnetlink @parse url, title, content, magnetlink
""" """
from urlparse import urljoin from sys import version_info
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
from searx.url_utils import urljoin
if version_info[0] == 3:
unicode = str
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']
paging = True paging = True
@ -31,7 +35,7 @@ def request(query, params):
def response(resp): def response(resp):
dom = html.fromstring(resp.content) dom = html.fromstring(resp.text)
search_res = dom.xpath('.//td[@class="x-item"]') search_res = dom.xpath('.//td[@class="x-item"]')
if not search_res: if not search_res:

View File

@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, thumbnail @parse url, title, content, publishedDate, thumbnail
""" """
from urllib import quote_plus from dateutil import parser
from json import loads from json import loads
from lxml import html from lxml import html
from dateutil import parser from searx.url_utils import quote_plus
# engine dependent config # engine dependent config
categories = ['news', 'social media'] categories = ['news', 'social media']

View File

@ -9,9 +9,9 @@
# @stable yes # @stable yes
# @parse (general) url, title, content # @parse (general) url, title, content
from urllib import urlencode
from lxml.html import fromstring from lxml.html import fromstring
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'

View File

@ -13,11 +13,11 @@
@todo rewrite to api @todo rewrite to api
""" """
from urllib import urlencode
from lxml.html import fromstring from lxml.html import fromstring
from requests import get from requests import get
from json import loads from json import loads
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']

View File

@ -1,10 +1,10 @@
import json import json
from urllib import urlencode
from re import compile, sub
from lxml import html from lxml import html
from searx.utils import html_to_text from re import compile
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import html_to_text
url = 'https://api.duckduckgo.com/'\ url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

View File

@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, img_src @parse url, title, content, publishedDate, img_src
""" """
from urllib import urlencode
from json import loads from json import loads
import datetime import datetime
from searx.utils import searx_useragent from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['general', 'news'] categories = ['general', 'news']

View File

@ -9,9 +9,9 @@
@parse url, title, content @parse url, title, content
""" """
from urllib import urlencode
from searx.engines.xpath import extract_text
from lxml import html from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['files'] categories = ['files']
@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
# do search-request # do search-request
def request(query, params): def request(query, params):
query = urlencode({'fdfilter': query, query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
'fdpage': params['pageno']})
params['url'] = search_url.format(query=query) params['url'] = search_url.format(query=query)
return params return params

View File

@ -1,5 +1,9 @@
from urllib import urlencode from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
url = 'http://www.filecrop.com/' url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
def request(query, params): def request(query, params):
index = 1 + (params['pageno'] - 1) * 30 index = 1 + (params['pageno'] - 1) * 30
params['url'] = search_url.format(query=urlencode({'w': query}), params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
index=index)
return params return params

View File

@ -13,8 +13,8 @@
More info on api-key : https://www.flickr.com/services/apps/create/ More info on api-key : https://www.flickr.com/services/apps/create/
""" """
from urllib import urlencode
from json import loads from json import loads
from searx.url_utils import urlencode
categories = ['images'] categories = ['images']

View File

@ -12,11 +12,11 @@
@parse url, title, thumbnail, img_src @parse url, title, thumbnail, img_src
""" """
from urllib import urlencode
from json import loads from json import loads
from time import time from time import time
import re import re
from searx.engines import logger from searx.engines import logger
from searx.url_utils import urlencode
logger = logger.getChild('flickr-noapi') logger = logger.getChild('flickr-noapi')

View File

@ -10,12 +10,10 @@
@parse url, title, content, thumbnail, img_src @parse url, title, content, thumbnail, img_src
""" """
from urlparse import urljoin
from cgi import escape from cgi import escape
from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from dateutil import parser from searx.url_utils import urljoin, urlencode
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -10,7 +10,7 @@ Frinkiac (Images)
""" """
from json import loads from json import loads
from urllib import urlencode from searx.url_utils import urlencode
categories = ['images'] categories = ['images']

View File

@ -11,10 +11,9 @@
""" """
from json import loads from json import loads
from random import randint
from time import time from time import time
from urllib import urlencode
from lxml.html import fromstring from lxml.html import fromstring
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']

View File

@ -10,8 +10,8 @@
@parse url, title, content @parse url, title, content
""" """
from urllib import urlencode
from json import loads from json import loads
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -9,11 +9,10 @@
# @parse url, title, content, suggestion # @parse url, title, content, suggestion
import re import re
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html, etree from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.search import logger from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl
logger = logger.getChild('google engine') logger = logger.getChild('google engine')

View File

@ -11,9 +11,9 @@
""" """
from datetime import date, timedelta from datetime import date, timedelta
from urllib import urlencode
from json import loads from json import loads
from lxml import html from lxml import html
from searx.url_utils import urlencode
# engine dependent config # engine dependent config

View File

@ -11,9 +11,8 @@
""" """
from lxml import html from lxml import html
from urllib import urlencode
from json import loads
from searx.engines.google import _fetch_supported_languages, supported_languages_url from searx.engines.google import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
# search-url # search-url
categories = ['news'] categories = ['news']

View File

@ -12,11 +12,15 @@
# @todo embedded (needs some md5 from video page) # @todo embedded (needs some md5 from video page)
from json import loads from json import loads
from urllib import urlencode
from lxml import html from lxml import html
from HTMLParser import HTMLParser
from searx.engines.xpath import extract_text
from dateutil import parser from dateutil import parser
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -1,11 +1,16 @@
from urllib import urlencode
from json import loads
from collections import Iterable from collections import Iterable
from json import loads
from sys import version_info
from searx.url_utils import urlencode
if version_info[0] == 3:
unicode = str
search_url = None search_url = None
url_query = None url_query = None
content_query = None content_query = None
title_query = None title_query = None
paging = False
suggestion_query = '' suggestion_query = ''
results_query = '' results_query = ''
@ -20,7 +25,7 @@ first_page_num = 1
def iterate(iterable): def iterate(iterable):
if type(iterable) == dict: if type(iterable) == dict:
it = iterable.iteritems() it = iterable.items()
else: else:
it = enumerate(iterable) it = enumerate(iterable)

View File

@ -10,12 +10,11 @@
@parse url, title, content, seed, leech, magnetlink @parse url, title, content, seed, leech, magnetlink
""" """
from urlparse import urljoin
from urllib import quote
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, convert_str_to_int from searx.utils import get_torrent_size, convert_str_to_int
from searx.url_utils import quote, urljoin
# engine dependent config # engine dependent config
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']

View File

@ -14,7 +14,7 @@
from json import loads from json import loads
from string import Formatter from string import Formatter
from urllib import urlencode, quote from searx.url_utils import urlencode, quote
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']

View File

@ -11,8 +11,8 @@
""" """
from json import loads from json import loads
from urllib import urlencode
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']

View File

@ -9,9 +9,9 @@
@parse url, title, content, seed, leech, torrentfile @parse url, title, content, seed, leech, torrentfile
""" """
from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['files', 'images', 'videos', 'music'] categories = ['files', 'images', 'videos', 'music']

View File

@ -11,7 +11,6 @@
""" """
from json import loads from json import loads
from searx.utils import searx_useragent
# engine dependent config # engine dependent config
categories = ['map'] categories = ['map']
@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
def request(query, params): def request(query, params):
params['url'] = base_url + search_string.format(query=query) params['url'] = base_url + search_string.format(query=query)
# using searx User-Agent
params['headers']['User-Agent'] = searx_useragent()
return params return params

View File

@ -10,9 +10,9 @@
@parse url, title @parse url, title
""" """
from urllib import urlencode
from json import loads from json import loads
from searx.utils import searx_useragent from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['map'] categories = ['map']

View File

@ -8,11 +8,10 @@
# @stable yes (HTML can change) # @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink # @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from urllib import quote
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
# engine dependent config # engine dependent config
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']

View File

@ -12,9 +12,8 @@
from datetime import datetime from datetime import datetime
from json import loads from json import loads
from urllib import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = None categories = None

View File

@ -11,9 +11,8 @@
""" """
import json import json
from urllib import urlencode
from urlparse import urlparse, urljoin
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode, urljoin, urlparse
# engine dependent config # engine dependent config
categories = ['general', 'images', 'news', 'social media'] categories = ['general', 'images', 'news', 'social media']
@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
# do search-request # do search-request
def request(query, params): def request(query, params):
query = urlencode({'q': query, query = urlencode({'q': query, 'limit': page_size})
'limit': page_size})
params['url'] = search_url.format(query=query) params['url'] = search_url.format(query=query)
return params return params

View File

@ -10,9 +10,7 @@
@parse url, title, content, img_src @parse url, title, content, img_src
""" """
from urllib import urlencode
from json import loads, dumps from json import loads, dumps
from dateutil import parser
from searx.utils import html_to_text from searx.utils import html_to_text
# engine dependent config # engine dependent config
@ -48,7 +46,7 @@ def response(resp):
search_res = loads(resp.text) search_res = loads(resp.text)
# return empty array if there are no results # return empty array if there are no results
if search_res.get('total') < 1: if search_res.get('total', 0) < 1:
return [] return []
# parse results # parse results

View File

@ -10,8 +10,8 @@
@parse url, title, content @parse url, title, content
""" """
from urllib import urlencode
from json import loads from json import loads
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
pageno=params['pageno'] - 1)
return params return params

View File

@ -10,8 +10,8 @@
@parse url, title, content @parse url, title, content
""" """
from urllib import urlencode
from json import loads from json import loads
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
pageno=params['pageno'] - 1)
return params return params

View File

@ -8,11 +8,9 @@
# @stable yes (HTML can change) # @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink # @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from urllib import quote
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
from searx.engines.xpath import extract_text from searx.url_utils import quote, urljoin
url = 'http://www.seedpeer.eu/' url = 'http://www.seedpeer.eu/'

View File

@ -11,13 +11,17 @@
""" """
import re import re
from StringIO import StringIO
from json import loads from json import loads
from lxml import etree from lxml import html
from urllib import urlencode, quote_plus
from dateutil import parser from dateutil import parser
from searx import logger from searx import logger
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
from searx.url_utils import quote_plus, urlencode
try:
from cStringIO import StringIO
except:
from io import StringIO
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
'scrolling="no" frameborder="no" ' +\ 'scrolling="no" frameborder="no" ' +\
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
def get_client_id(): def get_client_id():
response = http_get("https://soundcloud.com") response = http_get("https://soundcloud.com")
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
if response.ok: if response.ok:
tree = etree.parse(StringIO(response.content), etree.HTMLParser()) tree = html.fromstring(response.content)
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content # extracts valid app_js urls from soundcloud.com content
@ -51,7 +56,7 @@ def get_client_id():
# gets app_js and searches for the clientid # gets app_js and searches for the clientid
response = http_get(app_js_url) response = http_get(app_js_url)
if response.ok: if response.ok:
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) cids = cid_re.search(response.text)
if cids is not None and len(cids.groups()): if cids is not None and len(cids.groups()):
return cids.groups()[0] return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from urllib import urlencode from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 20 offset = (params['pageno'] - 1) * 20
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
offset=offset)
return params return params

View File

@ -10,10 +10,9 @@
@parse url, title, content @parse url, title, content
""" """
from urlparse import urljoin
from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
pageno=params['pageno'])
return params return params

View File

@ -56,7 +56,7 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.content) dom = html.fromstring(resp.text)
# parse results # parse results
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):

View File

@ -10,10 +10,10 @@
@parse url, title, content @parse url, title, content
""" """
from urllib import quote_plus
from lxml import html from lxml import html
from searx.languages import language_codes from searx.languages import language_codes
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import quote_plus
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -11,9 +11,9 @@
""" """
from json import loads from json import loads
from urllib import urlencode, unquote
import re import re
from lxml.html import fromstring from lxml.html import fromstring
from searx.url_utils import unquote, urlencode
# engine dependent config # engine dependent config
categories = ['general', 'images'] categories = ['general', 'images']
@ -27,10 +27,10 @@ search_string = '?{query}&page={page}'
supported_languages_url = base_url supported_languages_url = base_url
# regex # regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*') regex_json_remove_start = re.compile(b'^initialData:\s*')
regex_json_remove_end = re.compile(r',\s*environment$') regex_json_remove_end = re.compile(b',\s*environment$')
regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=') regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
# do search-request # do search-request
@ -45,10 +45,9 @@ def request(query, params):
ui_language = params['language'].split('-')[0] ui_language = params['language'].split('-')[0]
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'query': query, query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
'uiLanguage': ui_language, page=params['pageno']
'region': region}), )
page=params['pageno'])
# image search query is something like 'image?{query}&page={page}' # image search query is something like 'image?{query}&page={page}'
if params['category'] == 'images': if params['category'] == 'images':
@ -63,14 +62,14 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
json_regex = regex_json.search(resp.content) json_regex = regex_json.search(resp.text)
# check if results are returned # check if results are returned
if not json_regex: if not json_regex:
return [] return []
json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group())) json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
json = loads(json_raw) json = loads(json_raw.decode('utf-8'))
# parse results # parse results
for result in json['Results'].get('items', []): for result in json['Results'].get('items', []):
@ -78,7 +77,7 @@ def response(resp):
# parse image results # parse image results
if result.get('ContentType', '').startswith('image'): if result.get('ContentType', '').startswith('image'):
img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
# append result # append result
results.append({'url': result['SourceUrl'], results.append({'url': result['SourceUrl'],
@ -100,7 +99,7 @@ def response(resp):
# parse images # parse images
for result in json.get('Images', []): for result in json.get('Images', []):
# decode image url # decode image url
img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
# append result # append result
results.append({'url': result['SourceUrl'], results.append({'url': result['SourceUrl'],

View File

@ -11,11 +11,11 @@
""" """
import re import re
from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from datetime import datetime from datetime import datetime
from searx.engines.nyaa import int_or_zero, get_filesize_mul from searx.engines.nyaa import int_or_zero, get_filesize_mul
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['files', 'videos', 'music'] categories = ['files', 'videos', 'music']
@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}'
# do search-request # do search-request
def request(query, params): def request(query, params):
query = urlencode({'page': params['pageno'], query = urlencode({'page': params['pageno'], 'terms': query})
'terms': query})
params['url'] = search_url.format(query=query) params['url'] = search_url.format(query=query)
return params return params
@ -50,7 +49,7 @@ def response(resp):
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
# processing the results, two rows at a time # processing the results, two rows at a time
for i in xrange(0, len(rows), 2): for i in range(0, len(rows), 2):
# parse the first row # parse the first row
name_row = rows[i] name_row = rows[i]
@ -79,14 +78,14 @@ def response(resp):
groups = size_re.match(item).groups() groups = size_re.match(item).groups()
multiplier = get_filesize_mul(groups[1]) multiplier = get_filesize_mul(groups[1])
params['filesize'] = int(multiplier * float(groups[0])) params['filesize'] = int(multiplier * float(groups[0]))
except Exception as e: except:
pass pass
elif item.startswith('Date:'): elif item.startswith('Date:'):
try: try:
# Date: 2016-02-21 21:44 UTC # Date: 2016-02-21 21:44 UTC
date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC') date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
params['publishedDate'] = date params['publishedDate'] = date
except Exception as e: except:
pass pass
elif item.startswith('Comment:'): elif item.startswith('Comment:'):
params['content'] = item params['content'] = item

View File

@ -12,11 +12,11 @@
""" """
import re import re
from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text
from datetime import datetime from datetime import datetime
from searx.engines.nyaa import int_or_zero, get_filesize_mul from searx.engines.nyaa import int_or_zero, get_filesize_mul
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['files', 'videos', 'music'] categories = ['files', 'videos', 'music']
@ -70,7 +70,7 @@ def response(resp):
size_str = result.xpath('./dd/span[@class="s"]/text()')[0] size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
size, suffix = size_str.split() size, suffix = size_str.split()
params['filesize'] = int(size) * get_filesize_mul(suffix) params['filesize'] = int(size) * get_filesize_mul(suffix)
except Exception as e: except:
pass pass
# does our link contain a valid SHA1 sum? # does our link contain a valid SHA1 sum?
@ -84,7 +84,7 @@ def response(resp):
# Fri, 25 Mar 2016 16:29:01 # Fri, 25 Mar 2016 16:29:01
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S') date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
params['publishedDate'] = date params['publishedDate'] = date
except Exception as e: except:
pass pass
results.append(params) results.append(params)

View File

@ -9,8 +9,12 @@
@parse url, title, content @parse url, title, content
""" """
import re import re
from sys import version_info
from searx.utils import is_valid_lang from searx.utils import is_valid_lang
if version_info[0] == 3:
unicode = str
categories = ['general'] categories = ['general']
url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'

View File

@ -12,11 +12,10 @@
@todo publishedDate @todo publishedDate
""" """
from urlparse import urljoin
from urllib import urlencode
from lxml import html from lxml import html
from datetime import datetime from datetime import datetime
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['social media'] categories = ['social media']

View File

@ -13,8 +13,8 @@
# @todo set content-parameter with correct data # @todo set content-parameter with correct data
from json import loads from json import loads
from urllib import urlencode
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -14,12 +14,11 @@
from searx import logger from searx import logger
from searx.poolrequests import get from searx.poolrequests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring
from urllib import urlencode
logger = logger.getChild('wikidata') logger = logger.getChild('wikidata')
result_count = 1 result_count = 1
@ -62,14 +61,13 @@ def request(query, params):
language = 'en' language = 'en'
params['url'] = url_search.format( params['url'] = url_search.format(
query=urlencode({'label': query, query=urlencode({'label': query, 'language': language}))
'language': language}))
return params return params
def response(resp): def response(resp):
results = [] results = []
html = fromstring(resp.content) html = fromstring(resp.text)
wikidata_ids = html.xpath(wikidata_ids_xpath) wikidata_ids = html.xpath(wikidata_ids_xpath)
language = resp.search_params['language'].split('-')[0] language = resp.search_params['language'].split('-')[0]
@ -78,10 +76,9 @@ def response(resp):
# TODO: make requests asynchronous to avoid timeout when result_count > 1 # TODO: make requests asynchronous to avoid timeout when result_count > 1
for wikidata_id in wikidata_ids[:result_count]: for wikidata_id in wikidata_ids[:result_count]:
url = url_detail.format(query=urlencode({'page': wikidata_id, url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
'uselang': language}))
htmlresponse = get(url) htmlresponse = get(url)
jsonresponse = loads(htmlresponse.content) jsonresponse = loads(htmlresponse.text)
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language']) results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
return results return results

View File

@ -11,13 +11,12 @@
""" """
from json import loads from json import loads
from urllib import urlencode, quote
from lxml.html import fromstring from lxml.html import fromstring
from searx.url_utils import quote, urlencode
# search-url # search-url
base_url = 'https://{language}.wikipedia.org/' base_url = u'https://{language}.wikipedia.org/'
search_postfix = 'w/api.php?'\ search_url = base_url + u'w/api.php?'\
'action=query'\ 'action=query'\
'&format=json'\ '&format=json'\
'&{query}'\ '&{query}'\
@ -37,16 +36,16 @@ def url_lang(lang):
else: else:
language = lang language = lang
return base_url.format(language=language) return language
# do search-request # do search-request
def request(query, params): def request(query, params):
if query.islower(): if query.islower():
query += '|' + query.title() query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
params['url'] = url_lang(params['language']) \ params['url'] = search_url.format(query=urlencode({'titles': query}),
+ search_postfix.format(query=urlencode({'titles': query})) language=url_lang(params['language']))
return params return params
@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image):
def response(resp): def response(resp):
results = [] results = []
search_result = loads(resp.content) search_result = loads(resp.text)
# wikipedia article's unique id # wikipedia article's unique id
# first valid id is assumed to be the requested article # first valid id is assumed to be the requested article
@ -99,11 +98,9 @@ def response(resp):
extract = page.get('extract') extract = page.get('extract')
summary = extract_first_paragraph(extract, title, image) summary = extract_first_paragraph(extract, title, image)
if not summary:
return []
# link to wikipedia article # link to wikipedia article
wikipedia_link = url_lang(resp.search_params['language']) \ wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
+ 'wiki/' + quote(title.replace(' ', '_').encode('utf8')) + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
results.append({'url': wikipedia_link, 'title': title}) results.append({'url': wikipedia_link, 'title': title})

View File

@ -8,8 +8,8 @@
# @stable yes # @stable yes
# @parse url, infobox # @parse url, infobox
from urllib import urlencode
from lxml import etree from lxml import etree
from searx.url_utils import urlencode
# search-url # search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation',
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'input': query}), params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
api_key=api_key)
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query})) params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
return params return params
@ -56,7 +55,7 @@ def replace_pua_chars(text):
u'\uf74e': 'i', # imaginary number u'\uf74e': 'i', # imaginary number
u'\uf7d9': '='} # equals sign u'\uf7d9': '='} # equals sign
for k, v in pua_chars.iteritems(): for k, v in pua_chars.items():
text = text.replace(k, v) text = text.replace(k, v)
return text return text
@ -66,7 +65,7 @@ def replace_pua_chars(text):
def response(resp): def response(resp):
results = [] results = []
search_results = etree.XML(resp.content) search_results = etree.XML(resp.text)
# return empty array if there are no results # return empty array if there are no results
if search_results.xpath(failure_xpath): if search_results.xpath(failure_xpath):
@ -120,10 +119,10 @@ def response(resp):
# append infobox # append infobox
results.append({'infobox': infobox_title, results.append({'infobox': infobox_title,
'attributes': result_chunks, 'attributes': result_chunks,
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
# append link to site # append link to site
results.append({'url': resp.request.headers['Referer'].decode('utf8'), results.append({'url': resp.request.headers['Referer'],
'title': title, 'title': title,
'content': result_content}) 'content': result_content})

View File

@ -10,10 +10,9 @@
from json import loads from json import loads
from time import time from time import time
from urllib import urlencode
from lxml.etree import XML
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
from searx.url_utils import urlencode
# search-url # search-url
url = 'https://www.wolframalpha.com/' url = 'https://www.wolframalpha.com/'
@ -62,7 +61,7 @@ obtain_token()
# do search-request # do search-request
def request(query, params): def request(query, params):
# obtain token if last update was more than an hour # obtain token if last update was more than an hour
if time() - token['last_updated'] > 3600: if time() - (token['last_updated'] or 0) > 3600:
obtain_token() obtain_token()
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value']) params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query})) params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
@ -112,9 +111,9 @@ def response(resp):
results.append({'infobox': infobox_title, results.append({'infobox': infobox_title,
'attributes': result_chunks, 'attributes': result_chunks,
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
results.append({'url': resp.request.headers['Referer'].decode('utf8'), results.append({'url': resp.request.headers['Referer'],
'title': 'Wolfram|Alpha (' + infobox_title + ')', 'title': 'Wolfram|Alpha (' + infobox_title + ')',
'content': result_content}) 'content': result_content})

View File

@ -10,11 +10,9 @@
@parse url, title, thumbnail, img_src, content @parse url, title, thumbnail, img_src, content
""" """
from urllib import urlencode
from urlparse import urljoin
from lxml import html from lxml import html
import string
import re import re
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
@ -55,7 +53,7 @@ def response(resp):
cur_element += result_part cur_element += result_part
# fix xml-error # fix xml-error
cur_element = string.replace(cur_element, '"></a>', '"/></a>') cur_element = cur_element.replace('"></a>', '"/></a>')
dom = html.fromstring(cur_element) dom = html.fromstring(cur_element)
link = dom.xpath('//a')[0] link = dom.xpath('//a')[0]

View File

@ -13,8 +13,7 @@
""" """
from json import loads from json import loads
from urllib import urlencode from searx.url_utils import urlencode, urljoin
from urlparse import urljoin
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -1,13 +1,13 @@
from lxml import html from lxml import html
from urllib import urlencode, unquote
from urlparse import urlparse, urljoin
from lxml.etree import _ElementStringResult, _ElementUnicodeResult from lxml.etree import _ElementStringResult, _ElementUnicodeResult
from searx.utils import html_to_text from searx.utils import html_to_text
from searx.url_utils import unquote, urlencode, urljoin, urlparse
search_url = None search_url = None
url_xpath = None url_xpath = None
content_xpath = None content_xpath = None
title_xpath = None title_xpath = None
paging = False
suggestion_xpath = '' suggestion_xpath = ''
results_xpath = '' results_xpath = ''

View File

@ -13,8 +13,8 @@
# @todo parse video, audio and file results # @todo parse video, audio and file results
from json import loads from json import loads
from urllib import urlencode
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text

View File

@ -11,10 +11,9 @@
@parse url, title, content, suggestion @parse url, title, content, suggestion
""" """
from urllib import urlencode
from urlparse import unquote
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.url_utils import unquote, urlencode
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']

View File

@ -9,13 +9,13 @@
# @stable no (HTML can change) # @stable no (HTML can change)
# @parse url, title, content, publishedDate # @parse url, title, content, publishedDate
from urllib import urlencode import re
from datetime import datetime, timedelta
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']

View File

@ -9,9 +9,9 @@
@parse url, title, content @parse url, title, content
""" """
from urllib import urlencode
from lxml import html from lxml import html
from searx.search import logger from searx import logger
from searx.url_utils import urlencode
logger = logger.getChild('yandex engine') logger = logger.getChild('yandex engine')

View File

@ -9,8 +9,8 @@
# @parse url, title, content, publishedDate, thumbnail, embedded # @parse url, title, content, publishedDate, thumbnail, embedded
from json import loads from json import loads
from urllib import urlencode
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['videos', 'music'] categories = ['videos', 'music']

View File

@ -8,10 +8,10 @@
# @stable no # @stable no
# @parse url, title, content, publishedDate, thumbnail, embedded # @parse url, title, content, publishedDate, thumbnail, embedded
from urllib import quote_plus
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import list_get from searx.utils import list_get
from searx.url_utils import quote_plus
# engine dependent config # engine dependent config
categories = ['videos', 'music'] categories = ['videos', 'music']

View File

@ -14,9 +14,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2015 by Adam Tauber, <asciimoo@gmail.com> (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
''' '''
from sys import exit from sys import exit, version_info
from searx import logger from searx import logger
if version_info[0] == 3:
unicode = str
logger = logger.getChild('plugins') logger = logger.getChild('plugins')
from searx.plugins import (doai_rewrite, from searx.plugins import (doai_rewrite,

View File

@ -1,6 +1,6 @@
from flask_babel import gettext from flask_babel import gettext
import re import re
from urlparse import urlparse, parse_qsl from searx.url_utils import urlparse, parse_qsl
regex = re.compile(r'10\.\d{4,9}/[^\s]+') regex = re.compile(r'10\.\d{4,9}/[^\s]+')

View File

@ -16,14 +16,17 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
''' '''
import re import re
from urlparse import urlparse import sys
from lxml import etree from lxml import etree
from os import listdir, environ from os import listdir, environ
from os.path import isfile, isdir, join from os.path import isfile, isdir, join
from searx.plugins import logger from searx.plugins import logger
from flask_babel import gettext from flask_babel import gettext
from searx import searx_dir from searx import searx_dir
from searx.url_utils import urlparse
if sys.version_info[0] == 3:
unicode = str
name = "HTTPS rewrite" name = "HTTPS rewrite"
description = gettext('Rewrite HTTP links to HTTPS if possible') description = gettext('Rewrite HTTP links to HTTPS if possible')

View File

@ -22,7 +22,7 @@ default_on = True
# Self User Agent regex # Self User Agent regex
p = re.compile('.*user[ -]agent.*', re.IGNORECASE) p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
# attach callback to the post search hook # attach callback to the post search hook
@ -31,7 +31,7 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
def post_search(request, search): def post_search(request, search):
if search.search_query.pageno > 1: if search.search_query.pageno > 1:
return True return True
if search.search_query.query == 'ip': if search.search_query.query == b'ip':
x_forwarded_for = request.headers.getlist("X-Forwarded-For") x_forwarded_for = request.headers.getlist("X-Forwarded-For")
if x_forwarded_for: if x_forwarded_for:
ip = x_forwarded_for[0] ip = x_forwarded_for[0]

View File

@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from flask_babel import gettext from flask_babel import gettext
import re import re
from urlparse import urlunparse from searx.url_utils import urlunparse
regexes = {re.compile(r'utm_[^&]+&?'), regexes = {re.compile(r'utm_[^&]+&?'),
re.compile(r'(wkey|wemail)[^&]+&?'), re.compile(r'(wkey|wemail)[^&]+&?'),

View File

@ -23,7 +23,7 @@ class Setting(object):
def __init__(self, default_value, **kwargs): def __init__(self, default_value, **kwargs):
super(Setting, self).__init__() super(Setting, self).__init__()
self.value = default_value self.value = default_value
for key, value in kwargs.iteritems(): for key, value in kwargs.items():
setattr(self, key, value) setattr(self, key, value)
self._post_init() self._post_init()
@ -38,7 +38,7 @@ class Setting(object):
return self.value return self.value
def save(self, name, resp): def save(self, name, resp):
resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE) resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
class StringSetting(Setting): class StringSetting(Setting):
@ -133,7 +133,7 @@ class MapSetting(Setting):
def save(self, name, resp): def save(self, name, resp):
if hasattr(self, 'key'): if hasattr(self, 'key'):
resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE) resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
class SwitchableSetting(Setting): class SwitchableSetting(Setting):
@ -194,7 +194,7 @@ class EnginesSetting(SwitchableSetting):
def _post_init(self): def _post_init(self):
super(EnginesSetting, self)._post_init() super(EnginesSetting, self)._post_init()
transformed_choices = [] transformed_choices = []
for engine_name, engine in self.choices.iteritems(): for engine_name, engine in self.choices.items():
for category in engine.categories: for category in engine.categories:
transformed_choice = dict() transformed_choice = dict()
transformed_choice['default_on'] = not engine.disabled transformed_choice['default_on'] = not engine.disabled
@ -241,9 +241,9 @@ class Preferences(object):
'language': SearchLanguageSetting(settings['search']['language'], 'language': SearchLanguageSetting(settings['search']['language'],
choices=LANGUAGE_CODES), choices=LANGUAGE_CODES),
'locale': EnumStringSetting(settings['ui']['default_locale'], 'locale': EnumStringSetting(settings['ui']['default_locale'],
choices=settings['locales'].keys() + ['']), choices=list(settings['locales'].keys()) + ['']),
'autocomplete': EnumStringSetting(settings['search']['autocomplete'], 'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
choices=autocomplete.backends.keys() + ['']), choices=list(autocomplete.backends.keys()) + ['']),
'image_proxy': MapSetting(settings['server']['image_proxy'], 'image_proxy': MapSetting(settings['server']['image_proxy'],
map={'': settings['server']['image_proxy'], map={'': settings['server']['image_proxy'],
'0': False, '0': False,
@ -260,7 +260,7 @@ class Preferences(object):
self.unknown_params = {} self.unknown_params = {}
def parse_cookies(self, input_data): def parse_cookies(self, input_data):
for user_setting_name, user_setting in input_data.iteritems(): for user_setting_name, user_setting in input_data.items():
if user_setting_name in self.key_value_settings: if user_setting_name in self.key_value_settings:
self.key_value_settings[user_setting_name].parse(user_setting) self.key_value_settings[user_setting_name].parse(user_setting)
elif user_setting_name == 'disabled_engines': elif user_setting_name == 'disabled_engines':
@ -274,7 +274,7 @@ class Preferences(object):
disabled_engines = [] disabled_engines = []
enabled_categories = [] enabled_categories = []
disabled_plugins = [] disabled_plugins = []
for user_setting_name, user_setting in input_data.iteritems(): for user_setting_name, user_setting in input_data.items():
if user_setting_name in self.key_value_settings: if user_setting_name in self.key_value_settings:
self.key_value_settings[user_setting_name].parse(user_setting) self.key_value_settings[user_setting_name].parse(user_setting)
elif user_setting_name.startswith('engine_'): elif user_setting_name.startswith('engine_'):
@ -295,7 +295,7 @@ class Preferences(object):
return self.key_value_settings[user_setting_name].get_value() return self.key_value_settings[user_setting_name].get_value()
def save(self, resp): def save(self, resp):
for user_setting_name, user_setting in self.key_value_settings.iteritems(): for user_setting_name, user_setting in self.key_value_settings.items():
user_setting.save(user_setting_name, resp) user_setting.save(user_setting_name, resp)
self.engines.save(resp) self.engines.save(resp)
self.plugins.save(resp) self.plugins.save(resp)

View File

@ -21,8 +21,12 @@ from searx.languages import language_codes
from searx.engines import ( from searx.engines import (
categories, engines, engine_shortcuts categories, engines, engine_shortcuts
) )
import string
import re import re
import string
import sys
if sys.version_info[0] == 3:
unicode = str
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
@ -146,7 +150,7 @@ class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)""" """container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range): def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
self.query = query self.query = query.encode('utf-8')
self.engines = engines self.engines = engines
self.categories = categories self.categories = categories
self.lang = lang self.lang = lang

View File

@ -1,9 +1,13 @@
import re import re
import sys
from collections import defaultdict from collections import defaultdict
from operator import itemgetter from operator import itemgetter
from threading import RLock from threading import RLock
from urlparse import urlparse, unquote
from searx.engines import engines from searx.engines import engines
from searx.url_utils import urlparse, unquote
if sys.version_info[0] == 3:
basestring = str
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

View File

@ -16,8 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
''' '''
import gc import gc
import sys
import threading import threading
from thread import start_new_thread
from time import time from time import time
from uuid import uuid4 from uuid import uuid4
import requests.exceptions import requests.exceptions
@ -33,6 +33,14 @@ from searx import logger
from searx.plugins import plugins from searx.plugins import plugins
from searx.exceptions import SearxParameterException from searx.exceptions import SearxParameterException
try:
from thread import start_new_thread
except:
from _thread import start_new_thread
if sys.version_info[0] == 3:
unicode = str
logger = logger.getChild('search') logger = logger.getChild('search')
number_of_searches = 0 number_of_searches = 0
@ -387,7 +395,7 @@ class Search(object):
request_params['time_range'] = search_query.time_range request_params['time_range'] = search_query.time_range
# append request to list # append request to list
requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params)) requests.append((selected_engine['name'], search_query.query, request_params))
# update timeout_limit # update timeout_limit
timeout_limit = max(timeout_limit, engine.timeout) timeout_limit = max(timeout_limit, engine.timeout)

View File

@ -17,7 +17,7 @@ server:
ui: ui:
themes_path : "" themes_path : ""
default_theme : legacy default_theme : oscar
default_locale : "" default_locale : ""
outgoing: outgoing:

View File

@ -3,7 +3,7 @@
<div class="center"> <div class="center">
<h1>{{ _('Page not found') }}</h1> <h1>{{ _('Page not found') }}</h1>
{% autoescape false %} {% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %} {% endautoescape %}
</div> </div>
{% endblock %} {% endblock %}

View File

@ -3,7 +3,7 @@
<div class="center"> <div class="center">
<h1>{{ _('Page not found') }}</h1> <h1>{{ _('Page not found') }}</h1>
{% autoescape false %} {% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %} {% endautoescape %}
</div> </div>
{% endblock %} {% endblock %}

View File

@ -3,7 +3,7 @@
<div class="text-center"> <div class="text-center">
<h1>{{ _('Page not found') }}</h1> <h1>{{ _('Page not found') }}</h1>
{% autoescape false %} {% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %} {% endautoescape %}
</div> </div>
{% endblock %} {% endblock %}

View File

@ -3,7 +3,7 @@
<div class="center"> <div class="center">
<h1>{{ _('Page not found') }}</h1> <h1>{{ _('Page not found') }}</h1>
{% autoescape false %} {% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %} {% endautoescape %}
</div> </div>
{% endblock %} {% endblock %}

View File

@ -1,13 +1,16 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Shared testing code.""" """Shared testing code."""
from plone.testing import Layer
from unittest2 import TestCase
from os.path import dirname, join, abspath
import os import os
import subprocess import subprocess
import traceback
from os.path import dirname, join, abspath
from splinter import Browser
from unittest2 import TestCase
class SearxTestLayer: class SearxTestLayer:
@ -32,7 +35,7 @@ class SearxTestLayer:
testTearDown = classmethod(testTearDown) testTearDown = classmethod(testTearDown)
class SearxRobotLayer(Layer): class SearxRobotLayer():
"""Searx Robot Test Layer""" """Searx Robot Test Layer"""
def setUp(self): def setUp(self):
@ -62,7 +65,12 @@ class SearxRobotLayer(Layer):
del os.environ['SEARX_SETTINGS_PATH'] del os.environ['SEARX_SETTINGS_PATH']
SEARXROBOTLAYER = SearxRobotLayer() # SEARXROBOTLAYER = SearxRobotLayer()
def run_robot_tests(tests):
print('Running {0} tests'.format(len(tests)))
for test in tests:
with Browser() as browser:
test(browser)
class SearxTestCase(TestCase): class SearxTestCase(TestCase):
@ -72,17 +80,19 @@ class SearxTestCase(TestCase):
if __name__ == '__main__': if __name__ == '__main__':
from tests.test_robot import test_suite
import sys import sys
from zope.testrunner.runner import Runner # test cases
from tests import robot
base_dir = abspath(join(dirname(__file__), '../tests')) base_dir = abspath(join(dirname(__file__), '../tests'))
if sys.argv[1] == 'robot': if sys.argv[1] == 'robot':
r = Runner(['--color', test_layer = SearxRobotLayer()
'--auto-progress', errors = False
'--stop-on-error', try:
'--path', test_layer.setUp()
base_dir], run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
found_suites=[test_suite()]) except Exception:
r.run() errors = True
sys.exit(int(r.failed)) print('Error occured: {0}'.format(traceback.format_exc()))
test_layer.tearDown()
sys.exit(1 if errors else 0)

28
searx/url_utils.py Normal file
View File

@ -0,0 +1,28 @@
from sys import version_info
if version_info[0] == 2:
from urllib import quote, quote_plus, unquote, urlencode
from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
else:
from urllib.parse import (
parse_qsl,
quote,
quote_plus,
unquote,
urlencode,
urljoin,
urlparse,
urlunparse,
ParseResult
)
__export__ = (parse_qsl,
quote,
quote_plus,
unquote,
urlencode,
urljoin,
urlparse,
urlunparse,
ParseResult)

View File

@ -1,11 +1,9 @@
import cStringIO
import csv import csv
import os import os
import re import re
from babel.dates import format_date from babel.dates import format_date
from codecs import getincrementalencoder from codecs import getincrementalencoder
from HTMLParser import HTMLParser
from imp import load_source from imp import load_source
from os.path import splitext, join from os.path import splitext, join
from random import choice from random import choice
@ -16,6 +14,19 @@ from searx.languages import language_codes
from searx import settings from searx import settings
from searx import logger from searx import logger
try:
from cStringIO import StringIO
except:
from io import StringIO
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
if sys.version_info[0] == 3:
unichr = chr
unicode = str
logger = logger.getChild('utils') logger = logger.getChild('utils')
@ -140,7 +151,7 @@ class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue # Redirect output to a queue
self.queue = cStringIO.StringIO() self.queue = StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f self.stream = f
self.encoder = getincrementalencoder(encoding)() self.encoder = getincrementalencoder(encoding)()
@ -152,14 +163,13 @@ class UnicodeWriter:
unicode_row.append(col.encode('utf-8').strip()) unicode_row.append(col.encode('utf-8').strip())
else: else:
unicode_row.append(col) unicode_row.append(col)
self.writer.writerow(unicode_row) self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
# Fetch UTF-8 output from the queue ... # Fetch UTF-8 output from the queue ...
data = self.queue.getvalue() data = self.queue.getvalue().strip('\x00')
data = data.decode("utf-8")
# ... and reencode it into the target encoding # ... and reencode it into the target encoding
data = self.encoder.encode(data) data = self.encoder.encode(data)
# write to the target stream # write to the target stream
self.stream.write(data) self.stream.write(data.decode('utf-8'))
# empty queue # empty queue
self.queue.truncate(0) self.queue.truncate(0)
@ -231,7 +241,7 @@ def dict_subset(d, properties):
def prettify_url(url, max_length=74): def prettify_url(url, max_length=74):
if len(url) > max_length: if len(url) > max_length:
chunk_len = max_length / 2 + 1 chunk_len = int(max_length / 2 + 1)
return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:]) return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
else: else:
return url return url

View File

@ -22,11 +22,12 @@ if __name__ == '__main__':
from os.path import realpath, dirname from os.path import realpath, dirname
path.append(realpath(dirname(realpath(__file__)) + '/../')) path.append(realpath(dirname(realpath(__file__)) + '/../'))
import cStringIO
import hashlib import hashlib
import hmac import hmac
import json import json
import os import os
import sys
import requests import requests
from searx import logger from searx import logger
@ -42,8 +43,6 @@ except:
exit(1) exit(1)
from cgi import escape from cgi import escape
from datetime import datetime, timedelta from datetime import datetime, timedelta
from urllib import urlencode
from urlparse import urlparse, urljoin
from werkzeug.contrib.fixers import ProxyFix from werkzeug.contrib.fixers import ProxyFix
from flask import ( from flask import (
Flask, request, render_template, url_for, Response, make_response, Flask, request, render_template, url_for, Response, make_response,
@ -52,7 +51,7 @@ from flask import (
from flask_babel import Babel, gettext, format_date, format_decimal from flask_babel import Babel, gettext, format_date, format_decimal
from flask.json import jsonify from flask.json import jsonify
from searx import settings, searx_dir, searx_debug from searx import settings, searx_dir, searx_debug
from searx.exceptions import SearxException, SearxParameterException from searx.exceptions import SearxParameterException
from searx.engines import ( from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
) )
@ -69,6 +68,7 @@ from searx.autocomplete import searx_bang, backends as autocomplete_backends
from searx.plugins import plugins from searx.plugins import plugins
from searx.preferences import Preferences, ValidationException from searx.preferences import Preferences, ValidationException
from searx.answerers import answerers from searx.answerers import answerers
from searx.url_utils import urlencode, urlparse, urljoin
# check if the pyopenssl package is installed. # check if the pyopenssl package is installed.
# It is needed for SSL connection without trouble, see #298 # It is needed for SSL connection without trouble, see #298
@ -78,6 +78,15 @@ except ImportError:
logger.critical("The pyopenssl package has to be installed.\n" logger.critical("The pyopenssl package has to be installed.\n"
"Some HTTPS connections will fail") "Some HTTPS connections will fail")
try:
from cStringIO import StringIO
except:
from io import StringIO
if sys.version_info[0] == 3:
unicode = str
# serve pages with HTTP/1.1 # serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@ -357,6 +366,8 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
kwargs['unicode'] = unicode
kwargs['scripts'] = set() kwargs['scripts'] = set()
for plugin in request.user_plugins: for plugin in request.user_plugins:
for script in plugin.js_dependencies: for script in plugin.js_dependencies:
@ -375,7 +386,7 @@ def render(template_name, override_theme=None, **kwargs):
def pre_request(): def pre_request():
request.errors = [] request.errors = []
preferences = Preferences(themes, categories.keys(), engines, plugins) preferences = Preferences(themes, list(categories.keys()), engines, plugins)
request.preferences = preferences request.preferences = preferences
try: try:
preferences.parse_cookies(request.cookies) preferences.parse_cookies(request.cookies)
@ -479,10 +490,8 @@ def index():
for result in results: for result in results:
if output_format == 'html': if output_format == 'html':
if 'content' in result and result['content']: if 'content' in result and result['content']:
result['content'] = highlight_content(escape(result['content'][:1024]), result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
search_query.query.encode('utf-8')) result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
result['title'] = highlight_content(escape(result['title'] or u''),
search_query.query.encode('utf-8'))
else: else:
if result.get('content'): if result.get('content'):
result['content'] = html_to_text(result['content']).strip() result['content'] = html_to_text(result['content']).strip()
@ -510,7 +519,7 @@ def index():
result['publishedDate'] = format_date(result['publishedDate']) result['publishedDate'] = format_date(result['publishedDate'])
if output_format == 'json': if output_format == 'json':
return Response(json.dumps({'query': search_query.query, return Response(json.dumps({'query': search_query.query.decode('utf-8'),
'number_of_results': number_of_results, 'number_of_results': number_of_results,
'results': results, 'results': results,
'answers': list(result_container.answers), 'answers': list(result_container.answers),
@ -519,7 +528,7 @@ def index():
'suggestions': list(result_container.suggestions)}), 'suggestions': list(result_container.suggestions)}),
mimetype='application/json') mimetype='application/json')
elif output_format == 'csv': elif output_format == 'csv':
csv = UnicodeWriter(cStringIO.StringIO()) csv = UnicodeWriter(StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score') keys = ('title', 'url', 'content', 'host', 'engine', 'score')
csv.writerow(keys) csv.writerow(keys)
for row in results: for row in results:
@ -527,7 +536,7 @@ def index():
csv.writerow([row.get(key, '') for key in keys]) csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0) csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv') response = Response(csv.stream.read(), mimetype='application/csv')
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8')) cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
response.headers.add('Content-Disposition', cont_disp) response.headers.add('Content-Disposition', cont_disp)
return response return response
elif output_format == 'rss': elif output_format == 'rss':
@ -578,7 +587,7 @@ def autocompleter():
disabled_engines = request.preferences.engines.get_disabled() disabled_engines = request.preferences.engines.get_disabled()
# parse query # parse query
raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines) raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
raw_text_query.parse_query() raw_text_query.parse_query()
# check if search query is set # check if search query is set
@ -820,6 +829,7 @@ def page_not_found(e):
def run(): def run():
logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
app.run( app.run(
debug=searx_debug, debug=searx_debug,
use_debugger=searx_debug, use_debugger=searx_debug,

View File

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
from time import sleep
url = "http://localhost:11111/"
def test_index(browser):
# Visit URL
browser.visit(url)
assert browser.is_text_present('about')
def test_404(browser):
# Visit URL
browser.visit(url + 'missing_link')
assert browser.is_text_present('Page not found')
def test_about(browser):
browser.visit(url)
browser.click_link_by_text('about')
assert browser.is_text_present('Why use searx?')
def test_preferences(browser):
browser.visit(url)
browser.click_link_by_text('preferences')
assert browser.is_text_present('Preferences')
assert browser.is_text_present('Cookies')
assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
def test_preferences_engine_select(browser):
browser.visit(url)
browser.click_link_by_text('preferences')
assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
browser.find_by_xpath('//input[@value="save"]').first.click()
# waiting for the redirect - without this the test is flaky..
sleep(1)
browser.visit(url)
browser.click_link_by_text('preferences')
browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
def test_preferences_locale(browser):
browser.visit(url)
browser.click_link_by_text('preferences')
browser.select('locale', 'hu')
browser.find_by_xpath('//input[@value="save"]').first.click()
# waiting for the redirect - without this the test is flaky..
sleep(1)
browser.visit(url)
browser.click_link_by_text('beállítások')
browser.is_text_present('Beállítások')
def test_search(browser):
browser.visit(url)
browser.fill('q', 'test search query')
browser.find_by_xpath('//button[@type="submit"]').first.click()
assert browser.is_text_present('didn\'t find any results')

View File

@ -1,153 +0,0 @@
*** Settings ***
Library Selenium2Library timeout=10 implicit_wait=0.5
Test Setup Open Browser http://localhost:11111/
Test Teardown Close All Browsers
*** Keywords ***
Submit Preferences
Set Selenium Speed 2 seconds
Submit Form id=search_form
Location Should Be http://localhost:11111/
Set Selenium Speed 0 seconds
*** Test Cases ***
Front page
Page Should Contain about
Page Should Contain preferences
404 page
Go To http://localhost:11111/no-such-page
Page Should Contain Page not found
Page Should Contain Go to search page
About page
Click Element link=about
Page Should Contain Why use searx?
Page Should Contain Element link=search engines
Preferences page
Click Element link=preferences
Page Should Contain Preferences
Page Should Contain Default categories
Page Should Contain Currently used search engines
Page Should Contain dummy dummy
Page Should Contain general dummy
Switch category
Go To http://localhost:11111/preferences
Page Should Contain Checkbox category_general
Page Should Contain Checkbox category_dummy
Click Element xpath=//*[.="general"]
Click Element xpath=//*[.="dummy"]
Submit Preferences
Checkbox Should Not Be Selected category_general
Checkbox Should Be Selected category_dummy
Change language
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Select From List locale hu
Submit Preferences
Page Should Contain rólunk
Page Should Contain beállítások
Change method
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Select From List method GET
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be method GET
Select From List method POST
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be method POST
Change theme
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme legacy
Select From List theme oscar
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme oscar
Change safesearch
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be safesearch None
Select From List safesearch Strict
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be safesearch Strict
Change image proxy
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be image_proxy Disabled
Select From List image_proxy Enabled
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be image_proxy Enabled
Change search language
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be language Default language
Select From List language Türkçe - tr-TR
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be language Türkçe - tr-TR
Change autocomplete
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be autocomplete -
Select From List autocomplete google
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be autocomplete google
Change allowed/disabled engines
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Page Should Contain Engine name
Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] Block
Click Element xpath=//label[@class="deny"][@for='engine_general_general_dummy']
Submit Preferences
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Page Should Contain Engine name
Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] \
Block a plugin
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme legacy
Select From List theme oscar
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme oscar
Page Should Contain Plugins
Click Link Plugins
Checkbox Should Not Be Selected id=plugin_HTTPS_rewrite
Click Element xpath=//label[@for='plugin_HTTPS_rewrite']
Submit Preferences
Go To http://localhost:11111/preferences
Page Should Contain Plugins
Click Link Plugins
Checkbox Should Be Selected id=plugin_HTTPS_rewrite

View File

@ -25,7 +25,7 @@ class TestArchLinuxEngine(SearxTestCase):
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])
self.assertTrue('wiki.archlinux.org' in params['url']) self.assertTrue('wiki.archlinux.org' in params['url'])
for lang, domain in domains.iteritems(): for lang, domain in domains.items():
dic['language'] = lang dic['language'] = lang
params = archlinux.request(query, dic) params = archlinux.request(query, dic)
self.assertTrue(domain in params['url']) self.assertTrue(domain in params['url'])
@ -102,5 +102,5 @@ class TestArchLinuxEngine(SearxTestCase):
for exp in expected: for exp in expected:
res = results[i] res = results[i]
i += 1 i += 1
for key, value in exp.iteritems(): for key, value in exp.items():
self.assertEqual(res[key], value) self.assertEqual(res[key], value)

View File

@ -7,18 +7,18 @@ from searx.testing import SearxTestCase
class TestBingEngine(SearxTestCase): class TestBingEngine(SearxTestCase):
def test_request(self): def test_request(self):
query = 'test_query' query = u'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 0 dicto['pageno'] = 0
dicto['language'] = 'fr_FR' dicto['language'] = 'fr_FR'
params = bing.request(query, dicto) params = bing.request(query.encode('utf-8'), dicto)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])
self.assertTrue('language%3AFR' in params['url']) self.assertTrue('language%3AFR' in params['url'])
self.assertTrue('bing.com' in params['url']) self.assertTrue('bing.com' in params['url'])
dicto['language'] = 'all' dicto['language'] = 'all'
params = bing.request(query, dicto) params = bing.request(query.encode('utf-8'), dicto)
self.assertTrue('language' in params['url']) self.assertTrue('language' in params['url'])
def test_response(self): def test_response(self):

View File

@ -36,10 +36,10 @@ class TestBingNewsEngine(SearxTestCase):
self.assertRaises(AttributeError, bing_news.response, '') self.assertRaises(AttributeError, bing_news.response, '')
self.assertRaises(AttributeError, bing_news.response, '[]') self.assertRaises(AttributeError, bing_news.response, '[]')
response = mock.Mock(content='<html></html>') response = mock.Mock(text='<html></html>')
self.assertEqual(bing_news.response(response), []) self.assertEqual(bing_news.response(response), [])
response = mock.Mock(content='<html></html>') response = mock.Mock(text='<html></html>')
self.assertEqual(bing_news.response(response), []) self.assertEqual(bing_news.response(response), [])
html = """<?xml version="1.0" encoding="utf-8" ?> html = """<?xml version="1.0" encoding="utf-8" ?>
@ -74,7 +74,7 @@ class TestBingNewsEngine(SearxTestCase):
</item> </item>
</channel> </channel>
</rss>""" # noqa </rss>""" # noqa
response = mock.Mock(content=html) response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response) results = bing_news.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 2) self.assertEqual(len(results), 2)
@ -113,7 +113,7 @@ class TestBingNewsEngine(SearxTestCase):
</item> </item>
</channel> </channel>
</rss>""" # noqa </rss>""" # noqa
response = mock.Mock(content=html) response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response) results = bing_news.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
@ -136,11 +136,11 @@ class TestBingNewsEngine(SearxTestCase):
</channel> </channel>
</rss>""" # noqa </rss>""" # noqa
response = mock.Mock(content=html) response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response) results = bing_news.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
html = """<?xml version="1.0" encoding="utf-8" ?>gabarge""" html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
response = mock.Mock(content=html) response = mock.Mock(text=html.encode('utf-8'))
self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response) self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)

View File

@ -22,10 +22,10 @@ class TestBtdiggEngine(SearxTestCase):
self.assertRaises(AttributeError, btdigg.response, '') self.assertRaises(AttributeError, btdigg.response, '')
self.assertRaises(AttributeError, btdigg.response, '[]') self.assertRaises(AttributeError, btdigg.response, '[]')
response = mock.Mock(content='<html></html>') response = mock.Mock(text='<html></html>')
self.assertEqual(btdigg.response(response), []) self.assertEqual(btdigg.response(response), [])
html = """ html = u"""
<div id="search_res"> <div id="search_res">
<table> <table>
<tr> <tr>
@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
</table> </table>
</div> </div>
""" """
response = mock.Mock(content=html) response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase):
</table> </table>
</div> </div>
""" """
response = mock.Mock(content=html) response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
html = """ html = u"""
<div id="search_res"> <div id="search_res">
<table> <table>
<tr> <tr>
@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
</table> </table>
</div> </div>
""" """
response = mock.Mock(content=html) response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 5) self.assertEqual(len(results), 5)

Some files were not shown because too many files have changed in this diff Show More