From ee320d4e5b3ae4298334801b1d002b4f59d741e5 Mon Sep 17 00:00:00 2001 From: David A Roberts Date: Sun, 14 Aug 2016 19:25:29 +1000 Subject: [PATCH 01/11] Online Etymology Dictionary --- searx/settings.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index f0d33e50..b875c5b1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -145,6 +145,17 @@ engines: shortcut : ddg disabled : True + - name : etymonline + engine : xpath + paging : True + search_url : http://etymonline.com/?search={query}&p={pageno} + url_xpath : //dt/a[1]/@href + title_xpath : //dt + content_xpath : //dd + suggestion_xpath : //a[@class="crossreference"] + first_page_num : 0 + shortcut : et + # api-key required: http://www.faroo.com/hp/api/api.html#key # - name : faroo # engine : faroo From 0171db5c3f6aca3e60af8b9e288dd6ea541e3e0c Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Fri, 23 Dec 2016 12:59:03 +0100 Subject: [PATCH 02/11] [fix] handle missing images in google news --- searx/engines/google_news.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 37253c6a..11357f3e 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -70,9 +70,9 @@ def response(resp): 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), } - img = result.xpath('.//img/@src')[0] - if img and not img.startswith('data'): - r['img_src'] = img + imgs = result.xpath('.//img/@src') + if len(imgs) and not imgs[0].startswith('data'): + r['img_src'] = imgs[0] results.append(r) From 96c8d36e7b3c7c27b368aa3bd43e0d6fed79fc6d Mon Sep 17 00:00:00 2001 From: Stefan Antoni Date: Sat, 24 Dec 2016 16:44:12 +0100 Subject: [PATCH 03/11] Add ccctv engine to settings.yml --- AUTHORS.rst | 1 + searx/settings.yml | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 0c224088..1b6261dd 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -60,3 +60,4 @@ generally made searx better: - Thomas Renard @threnard - Pydo ``_ - Athemis ``_ +- Stefan Antoni `` diff --git a/searx/settings.yml b/searx/settings.yml index 527c8082..3baf85ec 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -91,6 +91,16 @@ engines: disabled : True shortcut : bb + - name : ccc-tv + engine : xpath + paging : False + search_url : https://media.ccc.de/search/?q={query} + url_xpath : //div[@class="caption"]/h3/a/@href + title_xpath : //div[@class="caption"]/h3/a/text() + content_xpath : //div[@class="caption"]/h4/@title + categories : videos + shortcut : c3tv + - name : crossref engine : json_engine paging : True From b6fc1546160285a2e0049d20cbd7e64269db1875 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 24 Dec 2016 22:23:36 +0100 Subject: [PATCH 04/11] [enh] add searx engine --- searx/engines/searx_engine.py | 57 +++++++++++++++++++++++++++++++++++ searx/settings.yml | 8 +++++ 2 files changed, 65 insertions(+) create mode 100644 searx/engines/searx_engine.py diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py new file mode 100644 index 00000000..91c26449 --- /dev/null +++ b/searx/engines/searx_engine.py @@ -0,0 +1,57 @@ +""" + Searx (all) + + @website https://github.com/asciimoo/searx + @provide-api yes (https://asciimoo.ithub.io/searx/dev/search_api.html) + + @using-api yes + @results JSON + @stable yes (using api) + @parse url, title, content +""" + +from json import loads +from searx.engines import categories as searx_categories + + +categories = searx_categories.keys() + +# search-url +instance_urls = [] +instance_index = 0 + + +# do search-request +def request(query, params): + global instance_index + params['url'] = instance_urls[instance_index % len(instance_urls)] + params['method'] = 'POST' + + instance_index += 1 + + params['data'] = { + 'q': query, + 'pageno': params['pageno'], + 'language': params['language'], + 'time_range': params['time_range'], + 'category': params['category'], + 'format': 'json' + } + + return params + + +# get response from search-request +def response(resp): + + response_json = loads(resp.text) + results = response_json['results'] + + for i in ('answers', 'infoboxes'): + results.extend(response_json[i]) + + results.extend({'suggestion': s} for s in response_json['suggestions']) + + results.append({'number_of_results': response_json['number_of_results']}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 527c8082..62c6b953 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -441,6 +441,14 @@ engines: shortcut : scc disabled : True +# - name : searx +# engine : searx_engine +# shortcut : se +# instance_urls : +# - http://127.0.0.1:8888/ +# - ... +# disabled : True + - name : spotify engine : spotify shortcut : stf From 07448e15e439dfee1d531ab9d1fd3aab15af5253 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sun, 25 Dec 2016 09:34:17 +0100 Subject: [PATCH 05/11] [fix] disable etymonline by default - closes #789 --- searx/settings.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/settings.yml b/searx/settings.yml index 62c6b953..01ac69de 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -164,6 +164,7 @@ engines: suggestion_xpath : //a[@class="crossreference"] first_page_num : 0 shortcut : et + disabled : True # api-key required: http://www.faroo.com/hp/api/api.html#key # - name : faroo From 89914e52e36a5048ec95dd075708f9d2bedf9ac1 Mon Sep 17 00:00:00 2001 From: Stefan Antoni Date: Sun, 25 Dec 2016 22:04:45 +0100 Subject: [PATCH 06/11] Update settings.yml --- searx/settings.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/settings.yml b/searx/settings.yml index 539406d6..69699605 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -99,6 +99,7 @@ engines: title_xpath : //div[@class="caption"]/h3/a/text() content_xpath : //div[@class="caption"]/h4/@title categories : videos + disabled : True shortcut : c3tv - name : crossref From a98bbefbcf89ad8d5959c0ddd1c02ab65b92913d Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sun, 25 Dec 2016 23:02:57 +0100 Subject: [PATCH 07/11] [enh] configurable listening http protocol version --- searx/settings.yml | 1 + searx/webapp.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/searx/settings.yml b/searx/settings.yml index 69699605..a475433a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -13,6 +13,7 @@ server: secret_key : "ultrasecretkey" # change this! base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" image_proxy : False # Proxying image results through searx + http_protocol_version : "1.0" # 1.0 and 1.1 are supported ui: themes_path : "" # Custom ui themes path - leave it blank if you didn't change diff --git a/searx/webapp.py b/searx/webapp.py index 096e1f26..f8253ec0 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -81,7 +81,7 @@ except ImportError: # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler -WSGIRequestHandler.protocol_version = "HTTP/1.1" +WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version']) static_path, templates_path, themes =\ get_themes(settings['ui']['themes_path'] From 14f58bdaecb288ac65782b2025e765c514b1844e Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sun, 25 Dec 2016 23:31:51 +0100 Subject: [PATCH 08/11] [fix] robot tests ++ set default value for server http protocol version --- searx/settings_robot.yml | 1 + searx/webapp.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index 7d270144..dbaf2fd5 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -13,6 +13,7 @@ server: secret_key : "ultrasecretkey" # change this! base_url : False image_proxy : False + http_protocol_version : "1.0" ui: themes_path : "" diff --git a/searx/webapp.py b/searx/webapp.py index f8253ec0..8f02a034 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -81,7 +81,7 @@ except ImportError: # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler -WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version']) +WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) static_path, templates_path, themes =\ get_themes(settings['ui']['themes_path'] From a605377c40e5f9d5ce80a222df65b605dfcd3907 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Tue, 27 Dec 2016 17:25:19 +0100 Subject: [PATCH 09/11] [enh] explicit engine init --- searx/engines/__init__.py | 14 ++++++-------- searx/webapp.py | 3 ++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 87b1b0eb..d3a57c32 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -32,7 +32,6 @@ engine_dir = dirname(realpath(__file__)) engines = {} categories = {'general': []} -_initialized = False engine_shortcuts = {} engine_default_args = {'paging': False, @@ -202,11 +201,10 @@ def get_engines_stats(): ] -if 'engines' not in settings or not settings['engines']: - logger.error('No engines found. Edit your settings.yml') - exit(2) - -for engine_data in settings['engines']: - engine = load_engine(engine_data) - if engine is not None: +def initialize_engines(engine_list): + global categories, engines + engines = {} + categories = {'general': []} + for engine_data in engine_list: + engine = load_engine(engine_data) engines[engine.name] = engine diff --git a/searx/webapp.py b/searx/webapp.py index 8f02a034..8a720fdc 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -53,7 +53,7 @@ from flask_babel import Babel, gettext, format_date, format_decimal from flask.json import jsonify from searx import settings, searx_dir, searx_debug from searx.engines import ( - categories, engines, get_engines_stats, engine_shortcuts + categories, engines, engine_shortcuts, get_engines_stats, initialize_engines ) from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes, @@ -765,6 +765,7 @@ def page_not_found(e): def run(): + initialize_engines(settings['engines']) app.run( debug=searx_debug, use_debugger=searx_debug, From 68cbf0448f0b24ec9d15301659576ace695e06ef Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Tue, 27 Dec 2016 17:26:25 +0100 Subject: [PATCH 10/11] [fix] do not reload engines twice if started with debug mode --- searx/webapp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/searx/webapp.py b/searx/webapp.py index 8a720fdc..6ad9ef06 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -765,7 +765,9 @@ def page_not_found(e): def run(): - initialize_engines(settings['engines']) + if not searx_debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true": + initialize_engines(settings['engines']) + app.run( debug=searx_debug, use_debugger=searx_debug, From ea034fafa994227ea89662710901e73cb901e28c Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Tue, 27 Dec 2016 17:55:44 +0100 Subject: [PATCH 11/11] [fix] proper engine init --- searx/engines/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index d3a57c32..3a1db276 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -202,9 +202,6 @@ def get_engines_stats(): def initialize_engines(engine_list): - global categories, engines - engines = {} - categories = {'general': []} for engine_data in engine_list: engine = load_engine(engine_data) engines[engine.name] = engine