Browse Source

[enh] py3 compatibility

libremiami
Adam Tauber 6 years ago
parent
commit
52e615dede
  1. 5
      .travis.yml
  2. 3
      requirements-dev.txt
  3. 12
      searx/answerers/__init__.py
  4. 13
      searx/answerers/random/answerer.py
  5. 16
      searx/answerers/statistics/answerer.py
  6. 6
      searx/autocomplete.py
  7. 3
      searx/engines/1337x.py
  8. 5
      searx/engines/__init__.py
  9. 3
      searx/engines/archlinux.py
  10. 6
      searx/engines/base.py
  11. 2
      searx/engines/bing.py
  12. 2
      searx/engines/bing_images.py
  13. 5
      searx/engines/bing_news.py
  14. 2
      searx/engines/blekko_images.py
  15. 5
      searx/engines/btdigg.py
  16. 14
      searx/engines/currency_convert.py
  17. 3
      searx/engines/dailymotion.py
  18. 5
      searx/engines/deezer.py
  19. 2
      searx/engines/deviantart.py
  20. 6
      searx/engines/dictzone.py
  21. 8
      searx/engines/digbt.py
  22. 4
      searx/engines/digg.py
  23. 2
      searx/engines/doku.py
  24. 2
      searx/engines/duckduckgo.py
  25. 6
      searx/engines/duckduckgo_definitions.py
  26. 2
      searx/engines/faroo.py
  27. 7
      searx/engines/fdroid.py
  28. 11
      searx/engines/filecrop.py
  29. 2
      searx/engines/flickr.py
  30. 2
      searx/engines/flickr_noapi.py
  31. 4
      searx/engines/framalibre.py
  32. 2
      searx/engines/frinkiac.py
  33. 3
      searx/engines/gigablast.py
  34. 2
      searx/engines/github.py
  35. 5
      searx/engines/google.py
  36. 2
      searx/engines/google_images.py
  37. 3
      searx/engines/google_news.py
  38. 10
      searx/engines/ina.py
  39. 11
      searx/engines/json_engine.py
  40. 3
      searx/engines/kickass.py
  41. 2
      searx/engines/mediawiki.py
  42. 2
      searx/engines/mixcloud.py
  43. 2
      searx/engines/nyaa.py
  44. 4
      searx/engines/openstreetmap.py
  45. 2
      searx/engines/photon.py
  46. 3
      searx/engines/piratebay.py
  47. 3
      searx/engines/qwant.py
  48. 6
      searx/engines/reddit.py
  49. 4
      searx/engines/scanr_structures.py
  50. 5
      searx/engines/searchcode_code.py
  51. 5
      searx/engines/searchcode_doc.py
  52. 4
      searx/engines/seedpeer.py
  53. 19
      searx/engines/soundcloud.py
  54. 5
      searx/engines/spotify.py
  55. 6
      searx/engines/stackoverflow.py
  56. 2
      searx/engines/startpage.py
  57. 2
      searx/engines/subtitleseeker.py
  58. 27
      searx/engines/swisscows.py
  59. 11
      searx/engines/tokyotoshokan.py
  60. 8
      searx/engines/torrentz.py
  61. 4
      searx/engines/translated.py
  62. 3
      searx/engines/twitter.py
  63. 2
      searx/engines/vimeo.py
  64. 13
      searx/engines/wikidata.py
  65. 21
      searx/engines/wikipedia.py
  66. 13
      searx/engines/wolframalpha_api.py
  67. 9
      searx/engines/wolframalpha_noapi.py
  68. 6
      searx/engines/www1x.py
  69. 3
      searx/engines/www500px.py
  70. 4
      searx/engines/xpath.py
  71. 2
      searx/engines/yacy.py
  72. 3
      searx/engines/yahoo.py
  73. 6
      searx/engines/yahoo_news.py
  74. 4
      searx/engines/yandex.py
  75. 2
      searx/engines/youtube_api.py
  76. 2
      searx/engines/youtube_noapi.py
  77. 5
      searx/plugins/__init__.py
  78. 2
      searx/plugins/doai_rewrite.py
  79. 5
      searx/plugins/https_rewrite.py
  80. 4
      searx/plugins/self_info.py
  81. 2
      searx/plugins/tracker_url_remover.py
  82. 18
      searx/preferences.py
  83. 8
      searx/query.py
  84. 6
      searx/results.py
  85. 12
      searx/search.py
  86. 2
      searx/settings_robot.yml
  87. 2
      searx/templates/courgette/404.html
  88. 2
      searx/templates/legacy/404.html
  89. 2
      searx/templates/oscar/404.html
  90. 2
      searx/templates/pix-art/404.html
  91. 42
      searx/testing.py
  92. 28
      searx/url_utils.py
  93. 26
      searx/utils.py
  94. 36
      searx/webapp.py
  95. 75
      tests/robot/__init__.py
  96. 153
      tests/robot/test_basic.robot
  97. 4
      tests/unit/engines/test_archlinux.py
  98. 6
      tests/unit/engines/test_bing.py
  99. 12
      tests/unit/engines/test_bing_news.py
  100. 12
      tests/unit/engines/test_btdigg.py
  101. Some files were not shown because too many files have changed in this diff Show More

5
.travis.yml

@ -9,6 +9,7 @@ addons:
language: python
python:
- "2.7"
- "3.6"
before_install:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
@ -24,9 +25,9 @@ script:
- ./manage.sh styles
- ./manage.sh grunt_build
- ./manage.sh tests
- ./manage.sh py_test_coverage
after_success:
coveralls
- ./manage.sh py_test_coverage
- coveralls
notifications:
irc:
channels:

3
requirements-dev.txt

@ -3,8 +3,7 @@ mock==2.0.0
nose2[coverage-plugin]
pep8==1.7.0
plone.testing==5.0.0
robotframework-selenium2library==1.8.0
robotsuite==1.7.0
splinter==0.7.5
transifex-client==0.12.2
unittest2==1.1.0
zope.testrunner==4.5.1

12
searx/answerers/__init__.py

@ -1,8 +1,12 @@
from os import listdir
from os.path import realpath, dirname, join, isdir
from sys import version_info
from searx.utils import load_module
from collections import defaultdict
if version_info[0] == 3:
unicode = str
answerers_dir = dirname(realpath(__file__))
@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__))
def load_answerers():
answerers = []
for filename in listdir(answerers_dir):
if not isdir(join(answerers_dir, filename)):
if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
continue
module = load_module('answerer.py', join(answerers_dir, filename))
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers):
def ask(query):
results = []
query_parts = filter(None, query.query.split())
query_parts = list(filter(None, query.query.split()))
if query_parts[0] not in answerers_by_keywords:
if query_parts[0].decode('utf-8') not in answerers_by_keywords:
return results
for answerer in answerers_by_keywords[query_parts[0]]:
for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
result = answerer(query)
if result:
results.append(result)

13
searx/answerers/random/answerer.py

@ -1,5 +1,6 @@
import random
import string
import sys
from flask_babel import gettext
# required answerer attribute
@ -8,7 +9,11 @@ keywords = ('random',)
random_int_max = 2**31
random_string_letters = string.lowercase + string.digits + string.uppercase
if sys.version_info[0] == 2:
random_string_letters = string.lowercase + string.digits + string.uppercase
else:
unicode = str
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_string():
@ -24,9 +29,9 @@ def random_int():
return unicode(random.randint(-random_int_max, random_int_max))
random_types = {u'string': random_string,
u'int': random_int,
u'float': random_float}
random_types = {b'string': random_string,
b'int': random_int,
b'float': random_float}
# required answerer function

16
searx/answerers/statistics/answerer.py

@ -1,8 +1,12 @@
from sys import version_info
from functools import reduce
from operator import mul
from flask_babel import gettext
if version_info[0] == 3:
unicode = str
keywords = ('min',
'max',
'avg',
@ -19,22 +23,22 @@ def answer(query):
return []
try:
args = map(float, parts[1:])
args = list(map(float, parts[1:]))
except:
return []
func = parts[0]
answer = None
if func == 'min':
if func == b'min':
answer = min(args)
elif func == 'max':
elif func == b'max':
answer = max(args)
elif func == 'avg':
elif func == b'avg':
answer = sum(args) / len(args)
elif func == 'sum':
elif func == b'sum':
answer = sum(args)
elif func == 'prod':
elif func == b'prod':
answer = reduce(mul, args, 1)
if answer is None:

6
searx/autocomplete.py

@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from lxml import etree
from json import loads
from urllib import urlencode
from searx import settings
from searx.languages import language_codes
from searx.engines import (
@ -26,6 +25,11 @@ from searx.engines import (
)
from searx.poolrequests import get as http_get
try:
from urllib import urlencode
except:
from urllib.parse import urlencode
def get(*args, **kwargs):
if 'timeout' not in kwargs:

3
searx/engines/1337x.py

@ -1,8 +1,7 @@
from urllib import quote
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from urlparse import urljoin
from searx.url_utils import quote, urljoin
url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/'

5
searx/engines/__init__.py

@ -72,12 +72,11 @@ def load_engine(engine_data):
if engine_data['categories'] == 'none':
engine.categories = []
else:
engine.categories = map(
str.strip, engine_data['categories'].split(','))
engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
continue
setattr(engine, param_name, engine_data[param_name])
for arg_name, arg_value in engine_default_args.iteritems():
for arg_name, arg_value in engine_default_args.items():
if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value)

3
searx/engines/archlinux.py

@ -11,10 +11,9 @@
@parse url, title
"""
from urlparse import urljoin
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']

6
searx/engines/base.py

@ -14,10 +14,10 @@
"""
from lxml import etree
from urllib import urlencode
from searx.utils import searx_useragent
from datetime import datetime
import re
from searx.url_utils import urlencode
from searx.utils import searx_useragent
categories = ['science']
@ -73,7 +73,7 @@ def request(query, params):
def response(resp):
results = []
search_results = etree.XML(resp.content)
search_results = etree.XML(resp.text)
for entry in search_results.xpath('./result/doc'):
content = "No description available"

2
searx/engines/bing.py

@ -13,9 +13,9 @@
@todo publishedDate
"""
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['general']

2
searx/engines/bing_images.py

@ -15,11 +15,11 @@
limited response to 10 images
"""
from urllib import urlencode
from lxml import html
from json import loads
import re
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

5
searx/engines/bing_news.py

@ -11,13 +11,12 @@
@parse url, title, content, publishedDate, thumbnail
"""
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode, urlparse, parse_qsl
# engine dependent config
categories = ['news']
@ -86,7 +85,7 @@ def request(query, params):
def response(resp):
results = []
rss = etree.fromstring(resp.content)
rss = etree.fromstring(resp.text)
ns = rss.nsmap

2
searx/engines/blekko_images.py

@ -11,7 +11,7 @@
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

5
searx/engines/btdigg.py

@ -10,11 +10,10 @@
@parse url, title, content, seed, leech, magnetlink
"""
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
from searx.utils import get_torrent_size
# engine dependent config
@ -38,7 +37,7 @@ def request(query, params):
def response(resp):
results = []
dom = html.fromstring(resp.content)
dom = html.fromstring(resp.text)
search_res = dom.xpath('//div[@id="search_res"]/table/tr')

14
searx/engines/currency_convert.py

@ -1,21 +1,25 @@
from datetime import datetime
import json
import re
import os
import json
import sys
import unicodedata
from datetime import datetime
if sys.version_info[0] == 3:
unicode = str
categories = []
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
weight = 100
parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name):
name = name.lower().replace('-', ' ').rstrip('s')
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
def request(query, params):
m = parser_re.match(unicode(query, 'utf8'))
m = parser_re.match(query)
if not m:
# wrong query
return params

3
searx/engines/dailymotion.py

@ -12,10 +12,9 @@
@todo set content-parameter with correct data
"""
from urllib import urlencode
from json import loads
from datetime import datetime
from requests import get
from searx.url_utils import urlencode
# engine dependent config
categories = ['videos']

5
searx/engines/deezer.py

@ -11,7 +11,7 @@
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
# engine dependent config
categories = ['music']
@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
def request(query, params):
offset = (params['pageno'] - 1) * 25
params['url'] = search_url.format(query=urlencode({'q': query}),
offset=offset)
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
return params

2
searx/engines/deviantart.py

@ -12,10 +12,10 @@
@todo rewrite to api
"""
from urllib import urlencode
from lxml import html
import re
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

6
searx/engines/dictzone.py

@ -10,20 +10,20 @@
"""
import re
from urlparse import urljoin
from lxml import html
from searx.utils import is_valid_lang
from searx.url_utils import urljoin
categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
def request(query, params):
m = parser_re.match(unicode(query, 'utf8'))
m = parser_re.match(query)
if not m:
return params

8
searx/engines/digbt.py

@ -10,10 +10,14 @@
@parse url, title, content, magnetlink
"""
from urlparse import urljoin
from sys import version_info
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from searx.url_utils import urljoin
if version_info[0] == 3:
unicode = str
categories = ['videos', 'music', 'files']
paging = True
@ -31,7 +35,7 @@ def request(query, params):
def response(resp):
dom = html.fromstring(resp.content)
dom = html.fromstring(resp.text)
search_res = dom.xpath('.//td[@class="x-item"]')
if not search_res:

4
searx/engines/digg.py

@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, thumbnail
"""
from urllib import quote_plus
from dateutil import parser
from json import loads
from lxml import html
from dateutil import parser
from searx.url_utils import quote_plus
# engine dependent config
categories = ['news', 'social media']

2
searx/engines/doku.py

@ -9,9 +9,9 @@
# @stable yes
# @parse (general) url, title, content
from urllib import urlencode
from lxml.html import fromstring
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'

2
searx/engines/duckduckgo.py

@ -13,11 +13,11 @@
@todo rewrite to api
"""
from urllib import urlencode
from lxml.html import fromstring
from requests import get
from json import loads
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['general']

6
searx/engines/duckduckgo_definitions.py

@ -1,10 +1,10 @@
import json
from urllib import urlencode
from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from re import compile
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import html_to_text
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'

2
searx/engines/faroo.py

@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, img_src
"""
from urllib import urlencode
from json import loads
import datetime
from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config
categories = ['general', 'news']

7
searx/engines/fdroid.py

@ -9,9 +9,9 @@
@parse url, title, content
"""
from urllib import urlencode
from searx.engines.xpath import extract_text
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['files']
@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
# do search-request
def request(query, params):
query = urlencode({'fdfilter': query,
'fdpage': params['pageno']})
query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
params['url'] = search_url.format(query=query)
return params

11
searx/engines/filecrop.py

@ -1,5 +1,9 @@
from urllib import urlencode
from HTMLParser import HTMLParser
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
def request(query, params):
index = 1 + (params['pageno'] - 1) * 30
params['url'] = search_url.format(query=urlencode({'w': query}),
index=index)
params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
return params

2
searx/engines/flickr.py

@ -13,8 +13,8 @@
More info on api-key : https://www.flickr.com/services/apps/create/
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
categories = ['images']

2
searx/engines/flickr_noapi.py

@ -12,11 +12,11 @@
@parse url, title, thumbnail, img_src
"""
from urllib import urlencode
from json import loads
from time import time
import re
from searx.engines import logger
from searx.url_utils import urlencode
logger = logger.getChild('flickr-noapi')

4
searx/engines/framalibre.py

@ -10,12 +10,10 @@
@parse url, title, content, thumbnail, img_src
"""
from urlparse import urljoin
from cgi import escape
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from dateutil import parser
from searx.url_utils import urljoin, urlencode
# engine dependent config
categories = ['it']

2
searx/engines/frinkiac.py

@ -10,7 +10,7 @@ Frinkiac (Images)
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
categories = ['images']

3
searx/engines/gigablast.py

@ -11,10 +11,9 @@
"""
from json import loads
from random import randint
from time import time
from urllib import urlencode
from lxml.html import fromstring
from searx.url_utils import urlencode
# engine dependent config
categories = ['general']

2
searx/engines/github.py

@ -10,8 +10,8 @@
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
# engine dependent config
categories = ['it']

5
searx/engines/google.py

@ -9,11 +9,10 @@
# @parse url, title, content, suggestion
import re
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url
from searx.search import logger
from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl
logger = logger.getChild('google engine')

2
searx/engines/google_images.py

@ -11,9 +11,9 @@
"""
from datetime import date, timedelta
from urllib import urlencode
from json import loads
from lxml import html
from searx.url_utils import urlencode
# engine dependent config

3
searx/engines/google_news.py

@ -11,9 +11,8 @@
"""
from lxml import html
from urllib import urlencode
from json import loads
from searx.engines.google import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
# search-url
categories = ['news']

10
searx/engines/ina.py

@ -12,11 +12,15 @@
# @todo embedded (needs some md5 from video page)
from json import loads
from urllib import urlencode
from lxml import html
from HTMLParser import HTMLParser
from searx.engines.xpath import extract_text
from dateutil import parser
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
# engine dependent config
categories = ['videos']

11
searx/engines/json_engine.py

@ -1,11 +1,16 @@
from urllib import urlencode
from json import loads
from collections import Iterable
from json import loads
from sys import version_info
from searx.url_utils import urlencode
if version_info[0] == 3:
unicode = str
search_url = None
url_query = None
content_query = None
title_query = None
paging = False
suggestion_query = ''
results_query = ''
@ -20,7 +25,7 @@ first_page_num = 1
def iterate(iterable):
if type(iterable) == dict:
it = iterable.iteritems()
it = iterable.items()
else:
it = enumerate(iterable)

3
searx/engines/kickass.py

@ -10,12 +10,11 @@
@parse url, title, content, seed, leech, magnetlink
"""
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, convert_str_to_int
from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']

2
searx/engines/mediawiki.py

@ -14,7 +14,7 @@
from json import loads
from string import Formatter
from urllib import urlencode, quote
from searx.url_utils import urlencode, quote
# engine dependent config
categories = ['general']

2
searx/engines/mixcloud.py

@ -11,8 +11,8 @@
"""
from json import loads
from urllib import urlencode
from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config
categories = ['music']

2
searx/engines/nyaa.py

@ -9,9 +9,9 @@
@parse url, title, content, seed, leech, torrentfile
"""
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['files', 'images', 'videos', 'music']

4
searx/engines/openstreetmap.py

@ -11,7 +11,6 @@
"""
from json import loads
from searx.utils import searx_useragent
# engine dependent config
categories = ['map']
@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
def request(query, params):
params['url'] = base_url + search_string.format(query=query)
# using searx User-Agent
params['headers']['User-Agent'] = searx_useragent()
return params

2
searx/engines/photon.py

@ -10,9 +10,9 @@
@parse url, title
"""
from urllib import urlencode
from json import loads
from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config
categories = ['map']

3
searx/engines/piratebay.py

@ -8,11 +8,10 @@
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']

3
searx/engines/qwant.py

@ -12,9 +12,8 @@
from datetime import datetime
from json import loads
from urllib import urlencode
from searx.utils import html_to_text
from searx.url_utils import urlencode
# engine dependent config
categories = None

6
searx/engines/reddit.py

@ -11,9 +11,8 @@
"""
import json
from urllib import urlencode
from urlparse import urlparse, urljoin
from datetime import datetime
from searx.url_utils import urlencode, urljoin, urlparse
# engine dependent config
categories = ['general', 'images', 'news', 'social media']
@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
# do search-request
def request(query, params):
query = urlencode({'q': query,
'limit': page_size})
query = urlencode({'q': query, 'limit': page_size})
params['url'] = search_url.format(query=query)
return params

4
searx/engines/scanr_structures.py

@ -10,9 +10,7 @@
@parse url, title, content, img_src
"""
from urllib import urlencode
from json import loads, dumps
from dateutil import parser
from searx.utils import html_to_text
# engine dependent config
@ -48,7 +46,7 @@ def response(resp):
search_res = loads(resp.text)
# return empty array if there are no results
if search_res.get('total') < 1:
if search_res.get('total', 0) < 1:
return []
# parse results

5
searx/engines/searchcode_code.py

@ -10,8 +10,8 @@
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
# engine dependent config
@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno'] - 1)
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
return params

5
searx/engines/searchcode_doc.py

@ -10,8 +10,8 @@
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
# engine dependent config
categories = ['it']
@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno'] - 1)
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
return params

4
searx/engines/seedpeer.py

@ -8,11 +8,9 @@
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
url = 'http://www.seedpeer.eu/'

19
searx/engines/soundcloud.py

@ -11,13 +11,17 @@
"""
import re
from StringIO import StringIO
from json import loads
from lxml import etree
from urllib import urlencode, quote_plus
from lxml import html
from dateutil import parser
from searx import logger
from searx.poolrequests import get as http_get
from searx.url_utils import quote_plus, urlencode
try:
from cStringIO import StringIO
except:
from io import StringIO
# engine dependent config
categories = ['music']
@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
'scrolling="no" frameborder="no" ' +\
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
def get_client_id():
response = http_get("https://soundcloud.com")
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
if response.ok:
tree = etree.parse(StringIO(response.content), etree.HTMLParser())
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
tree = html.fromstring(response.content)
script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content
@ -51,7 +56,7 @@ def get_client_id():
# gets app_js and searches for the clientid
response = http_get(app_js_url)
if response.ok:
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
cids = cid_re.search(response.text)
if cids is not None and len(cids.groups()):
return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

5
searx/engines/spotify.py

@ -11,7 +11,7 @@
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
# engine dependent config
categories = ['music']
@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
def request(query, params):
offset = (params['pageno'] - 1) * 20
params['url'] = search_url.format(query=urlencode({'q': query}),
offset=offset)
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
return params

6
searx/engines/stackoverflow.py

@ -10,10 +10,9 @@
@parse url, title, content
"""
from urlparse import urljoin
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno'])
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
return params

2
searx/engines/startpage.py

@ -56,7 +56,7 @@ def request(query, params):
def response(resp):
results = []
dom = html.fromstring(resp.content)
dom = html.fromstring(resp.text)
# parse results
for result in dom.xpath(results_xpath):

2
searx/engines/subtitleseeker.py

@ -10,10 +10,10 @@
@parse url, title, content
"""
from urllib import quote_plus
from lxml import html
from searx.languages import language_codes
from searx.engines.xpath import extract_text
from searx.url_utils import quote_plus
# engine dependent config
categories = ['videos']

27
searx/engines/swisscows.py

@ -11,9 +11,9 @@
"""
from json import loads
from urllib import urlencode, unquote
import re
from lxml.html import fromstring
from searx.url_utils import unquote, urlencode
# engine dependent config
categories = ['general', 'images']