Merge pull request #1669 from dalf/engine-fixes

Engine fixes
This commit is contained in:
Alexandre Flament 2019-08-05 15:57:33 +02:00 committed by GitHub
commit 12f891da84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 57 additions and 49 deletions

View File

@ -29,7 +29,7 @@ def request(query, params):
# basic search # basic search
offset = (params['pageno'] - 1) * number_of_results offset = (params['pageno'] - 1) * number_of_results
string_args = dict(query=query, string_args = dict(query=query.decode('utf-8'),
offset=offset, offset=offset,
number_of_results=number_of_results) number_of_results=number_of_results)

View File

@ -47,8 +47,6 @@ def request(query, params):
params['url'] = base_url + search_path params['url'] = base_url + search_path
params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64')
return params return params

View File

@ -15,7 +15,7 @@ from searx.utils import is_valid_lang
from searx.url_utils import urljoin from searx.url_utils import urljoin
categories = ['general'] categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100 weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)

View File

@ -18,13 +18,13 @@ categories = ['files']
paging = True paging = True
# search-url # search-url
base_url = 'https://f-droid.org/' base_url = 'https://search.f-droid.org/'
search_url = base_url + 'repository/browse/?{query}' search_url = base_url + '?{query}'
# do search-request # do search-request
def request(query, params): def request(query, params):
query = urlencode({'fdfilter': query, 'fdpage': params['pageno']}) query = urlencode({'q': query, 'page': params['pageno'], 'lang': ''})
params['url'] = search_url.format(query=query) params['url'] = search_url.format(query=query)
return params return params
@ -35,17 +35,16 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
for app in dom.xpath('//div[@id="appheader"]'): for app in dom.xpath('//a[@class="package-header"]'):
url = app.xpath('./ancestor::a/@href')[0] app_url = app.xpath('./@href')[0]
title = app.xpath('./p/span/text()')[0] app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()'))
img_src = app.xpath('.//img/@src')[0] app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \
+ ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip()
app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0]
content = extract_text(app.xpath('./p')[0]) results.append({'url': app_url,
content = content.replace(title, '', 1).strip() 'title': app_title,
'content': app_content,
results.append({'url': url, 'img_src': app_img_src})
'title': title,
'content': content,
'img_src': img_src})
return results return results

View File

@ -204,11 +204,11 @@ engines:
- name : etymonline - name : etymonline
engine : xpath engine : xpath
paging : True paging : True
search_url : http://etymonline.com/?search={query}&p={pageno} search_url : https://etymonline.com/search?page={pageno}&q={query}
url_xpath : //a[contains(@class, "word--")]/@href url_xpath : //a[contains(@class, "word__name--")]/@href
title_xpath : //p[contains(@class, "word__name--")]/text() title_xpath : //a[contains(@class, "word__name--")]
content_xpath : //section[contains(@class, "word__defination")]/object content_xpath : //section[contains(@class, "word__defination")]
first_page_num : 0 first_page_num : 1
shortcut : et shortcut : et
disabled : True disabled : True
@ -703,9 +703,9 @@ engines:
shortcut: vo shortcut: vo
categories: social media categories: social media
search_url : https://searchvoat.co/?t={query} search_url : https://searchvoat.co/?t={query}
url_xpath : //div[@class="entry"]/p/a[@class="title"]/@href url_xpath : //div[@class="entry"]/p/a[contains(@class, "title")]/@href
title_xpath : //div[@class="entry"]/p/a[@class="title"] title_xpath : //div[@class="entry"]/p/a[contains(@class, "title")]
content_xpath : //div[@class="entry"]/p/span[@class="domain"] content_xpath : //div[@class="entry"]/p/span[@class="domain"]/a/text()
timeout : 10.0 timeout : 10.0
disabled : True disabled : True

View File

@ -8,7 +8,7 @@ from searx.testing import SearxTestCase
class TestBaseEngine(SearxTestCase): class TestBaseEngine(SearxTestCase):
def test_request(self): def test_request(self):
query = 'test_query' query = 'test_query'.encode('utf-8')
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
params = arxiv.request(query, dicto) params = arxiv.request(query, dicto)

View File

@ -13,29 +13,40 @@ class TestFdroidEngine(SearxTestCase):
params = fdroid.request(query, dic) params = fdroid.request(query, dic)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])
self.assertTrue('f-droid.org' in params['url']) self.assertTrue('search.f-droid.org' in params['url'])
def test_response(self): def test_response_empty(self):
resp = mock.Mock(text='<html></html>') resp = mock.Mock(text='<html></html>')
self.assertEqual(fdroid.response(resp), []) self.assertEqual(fdroid.response(resp), [])
def test_response_oneresult(self):
html = """ html = """
<a href="https://google.com/qwerty"> <!DOCTYPE html>
<div id="appheader"> <html>
<div style="float:left;padding-right:10px;"> <head>
<img src="http://example.com/image.png" <title>test</title>
style="width:48px;border:none;"> </head>
</div> <body>
<div style="float:right;"> <div class="site-wrapper">
<p>Details...</p> <div class="main-content">
</div> <a class="package-header" href="https://example.com/app.url">
<p style="color:#000000;"> <img class="package-icon" src="https://example.com/appexample.logo.png" />
<span style="font-size:20px;">Sample title</span>
<br> <div class="package-info">
Sample content <h4 class="package-name">
</p> App Example 1
</div> </h4>
</a>
<div class="package-desc">
<span class="package-summary">Description App Example 1</span>
<span class="package-license">GPL-3.0-only</span>
</div>
</div>
</a>
</div>
</div>
</body>
</html>
""" """
resp = mock.Mock(text=html) resp = mock.Mock(text=html)
@ -43,7 +54,7 @@ class TestFdroidEngine(SearxTestCase):
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertEqual(results[0]['url'], 'https://google.com/qwerty') self.assertEqual(results[0]['url'], 'https://example.com/app.url')
self.assertEqual(results[0]['title'], 'Sample title') self.assertEqual(results[0]['title'], 'App Example 1')
self.assertEqual(results[0]['content'], 'Sample content') self.assertEqual(results[0]['content'], 'Description App Example 1 - GPL-3.0-only')
self.assertEqual(results[0]['img_src'], 'http://example.com/image.png') self.assertEqual(results[0]['img_src'], 'https://example.com/appexample.logo.png')