From 72029d27ded8d93ab891c616d6bffbe8d3a67dd2 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 2 Aug 2019 13:50:51 +0200 Subject: [PATCH] [enh] Add timeout limit per request (#1640) The new url parameter "timeout_limit" set timeout limit defined in second. Example "timeout_limit=1.5" means the timeout limit is 1.5 seconds. In addition, the query can start with <[number] to set the timeout limit. For number between 0 and 99, the unit is the second : Example: "<30 searx" means the timeout limit is 3 seconds For number above 100, the unit is the millisecond: Example: "<850 searx" means the timeout is 850 milliseconds. In addition, there is a new optional setting: outgoing.max_request_timeout. If not set, the user timeout can't go above searx configuration (as before: the max timeout of selected engine for a query). If the value is set, the user can set a timeout between 0 and max_request_timeout using <[number] or timeout_limit query parameter. Related to #1077 Updated version of PR #1413 from @isj-privacore --- searx/query.py | 19 +++++++++- searx/search.py | 53 +++++++++++++++++++++++---- searx/settings.yml | 3 +- searx/templates/oscar/results.html | 1 + searx/templates/simple/infobox.html | 5 +++ searx/templates/simple/results.html | 12 +++++-- searx/templates/simple/search.html | 1 + searx/webapp.py | 3 +- tests/unit/test_query.py | 42 ++++++++++++++++++++++ tests/unit/test_search.py | 56 +++++++++++++++++++++++++++-- 10 files changed, 180 insertions(+), 15 deletions(-) diff --git a/searx/query.py b/searx/query.py index 5265ac91..382aed87 100644 --- a/searx/query.py +++ b/searx/query.py @@ -43,6 +43,7 @@ class RawTextQuery(object): self.query_parts = [] self.engines = [] self.languages = [] + self.timeout_limit = None self.specific = False # parse query, if tags are set, which @@ -69,6 +70,21 @@ class RawTextQuery(object): self.query_parts.append(query_part) continue + # this force the timeout + if query_part[0] == '<': + try: + raw_timeout_limit = int(query_part[1:]) + if raw_timeout_limit < 100: + # below 100, the unit is the second ( <3 = 3 seconds timeout ) + self.timeout_limit = float(raw_timeout_limit) + else: + # 100 or above, the unit is the millisecond ( <850 = 850 milliseconds timeout ) + self.timeout_limit = raw_timeout_limit / 1000.0 + parse_next = True + except ValueError: + # error not reported to the user + pass + # this force a language if query_part[0] == ':': lang = query_part[1:].lower().replace('_', '-') @@ -161,7 +177,7 @@ class RawTextQuery(object): class SearchQuery(object): """container for all the search parameters (query, language, etc...)""" - def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range): + def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None): self.query = query.encode('utf-8') self.engines = engines self.categories = categories @@ -169,6 +185,7 @@ class SearchQuery(object): self.safesearch = safesearch self.pageno = pageno self.time_range = time_range + self.timeout_limit = timeout_limit def __str__(self): return str(self.query) + ";" + str(self.engines) diff --git a/searx/search.py b/searx/search.py index 1472073b..a2c1c85f 100644 --- a/searx/search.py +++ b/searx/search.py @@ -45,6 +45,16 @@ if sys.version_info[0] == 3: logger = logger.getChild('search') number_of_searches = 0 +max_request_timeout = settings.get('outgoing', {}).get('max_request_timeout' or None) +if max_request_timeout is None: + logger.info('max_request_timeout={0}'.format(max_request_timeout)) +else: + if isinstance(max_request_timeout, float): + logger.info('max_request_timeout={0} second(s)'.format(max_request_timeout)) + else: + logger.critical('outgoing.max_request_timeout if defined has to be float') + from sys import exit + exit(1) def send_http_request(engine, request_params): @@ -265,6 +275,15 @@ def get_search_query_from_webapp(preferences, form): # query_engines query_engines = raw_text_query.engines + # timeout_limit + query_timeout = raw_text_query.timeout_limit + if query_timeout is None and 'timeout_limit' in form: + raw_time_limit = form.get('timeout_limit') + try: + query_timeout = float(raw_time_limit) + except ValueError: + raise SearxParameterException('timeout_limit', raw_time_limit) + # query_categories query_categories = [] @@ -338,7 +357,8 @@ def get_search_query_from_webapp(preferences, form): query_engines = deduplicate_query_engines(query_engines) return (SearchQuery(query, query_engines, query_categories, - query_lang, query_safesearch, query_pageno, query_time_range), + query_lang, query_safesearch, query_pageno, + query_time_range, query_timeout), raw_text_query) @@ -351,6 +371,7 @@ class Search(object): super(Search, self).__init__() self.search_query = search_query self.result_container = ResultContainer() + self.actual_timeout = None # do search-request def search(self): @@ -380,7 +401,7 @@ class Search(object): search_query = self.search_query # max of all selected engine timeout - timeout_limit = 0 + default_timeout = 0 # start search-reqest for all selected engines for selected_engine in search_query.engines: @@ -420,12 +441,32 @@ class Search(object): # append request to list requests.append((selected_engine['name'], search_query.query, request_params)) - # update timeout_limit - timeout_limit = max(timeout_limit, engine.timeout) + # update default_timeout + default_timeout = max(default_timeout, engine.timeout) + # adjust timeout + self.actual_timeout = default_timeout + query_timeout = self.search_query.timeout_limit + + if max_request_timeout is None and query_timeout is None: + # No max, no user query: default_timeout + pass + elif max_request_timeout is None and query_timeout is not None: + # No max, but user query: From user query except if above default + self.actual_timeout = min(default_timeout, query_timeout) + elif max_request_timeout is not None and query_timeout is None: + # Max, no user query: Default except if above max + self.actual_timeout = min(default_timeout, max_request_timeout) + elif max_request_timeout is not None and query_timeout is not None: + # Max & user query: From user query except if above max + self.actual_timeout = min(query_timeout, max_request_timeout) + + logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" + .format(self.actual_timeout, default_timeout, query_timeout, max_request_timeout)) + + # send all search-request if requests: - # send all search-request - search_multiple_requests(requests, self.result_container, start_time, timeout_limit) + search_multiple_requests(requests, self.result_container, start_time, self.actual_timeout) start_new_thread(gc.collect, tuple()) # return results, suggestions, answers and infoboxes diff --git a/searx/settings.yml b/searx/settings.yml index 504a9fbe..6659c129 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -34,7 +34,8 @@ ui: # key : !!binary "your_morty_proxy_key" outgoing: # communication with search engines - request_timeout : 2.0 # seconds + request_timeout : 2.0 # default timeout in seconds, can be override by engine + # max_request_timeout: 10.0 # the maximum timeout in seconds useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator pool_connections : 100 # Number of different hosts pool_maxsize : 10 # Number of simultaneous requests by host diff --git a/searx/templates/oscar/results.html b/searx/templates/oscar/results.html index f712e577..ce557daf 100644 --- a/searx/templates/oscar/results.html +++ b/searx/templates/oscar/results.html @@ -5,6 +5,7 @@ + {%- endmacro %} {%- macro search_url() %}{{ base_url }}?q={{ q|urlencode }}{% if selected_categories %}&categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if pageno > 1 %}&pageno={{ pageno }}{% endif %}{% if time_range %}&time_range={{ time_range }}{% endif %}{% if current_language != 'all' %}&language={{ current_language }}{% endif %}{% endmacro -%} diff --git a/searx/templates/simple/infobox.html b/searx/templates/simple/infobox.html index d99806ac..50b56891 100644 --- a/searx/templates/simple/infobox.html +++ b/searx/templates/simple/infobox.html @@ -36,6 +36,11 @@ {% for suggestion in topic.suggestions %}
+ + + + + {% if timeout_limit %}{% endif %}
{% endfor %} diff --git a/searx/templates/simple/results.html b/searx/templates/simple/results.html index a8e899e5..770eebe8 100644 --- a/searx/templates/simple/results.html +++ b/searx/templates/simple/results.html @@ -51,9 +51,11 @@ {% for suggestion in suggestions %}
+ + {% if timeout_limit %}{% endif %}
{% endfor %} @@ -63,7 +65,7 @@

{{ _('Search URL') }} :

-
{{ base_url }}?q={{ q|urlencode }}&language={{ current_language }}&time_range={{ time_range }}&safesearch={{ safesearch }}{% if pageno > 1 %}&pageno={{ pageno }}{% endif %}{% if selected_categories %}&categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}
+
{{ base_url }}?q={{ q|urlencode }}&language={{ current_language }}&time_range={{ time_range }}&safesearch={{ safesearch }}{% if pageno > 1 %}&pageno={{ pageno }}{% endif %}{% if selected_categories %}&categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if timeout_limit %}&timeout_limit={{ timeout_limit|urlencode }}{% endif %}

{{ _('Download results') }}

@@ -79,6 +81,7 @@ + {% if timeout_limit %}{% endif %}
@@ -97,6 +100,7 @@ + {% if timeout_limit %}{% endif %} @@ -134,7 +138,8 @@ - + {% if timeout_limit %}{% endif %} + {% endif %} @@ -149,7 +154,8 @@ - + {% if timeout_limit %}{% endif %} + diff --git a/searx/templates/simple/search.html b/searx/templates/simple/search.html index 9c4a99b6..e9023b42 100644 --- a/searx/templates/simple/search.html +++ b/searx/templates/simple/search.html @@ -14,4 +14,5 @@ {% include 'simple/categories.html' %} + {% if timeout_limit %}{% endif %} diff --git a/searx/webapp.py b/searx/webapp.py index 8dd4af07..ffe9b4da 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -628,7 +628,8 @@ def index(): fallback=settings['search']['language']), base_url=get_base_url(), theme=get_current_theme_name(), - favicons=global_favicons[themes.index(get_current_theme_name())] + favicons=global_favicons[themes.index(get_current_theme_name())], + timeout_limit=request.form.get('timeout_limit', None) ) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 49ccb608..e4c0bdee 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -62,3 +62,45 @@ class TestQuery(SearxTestCase): self.assertEquals(len(query.query_parts), 1) self.assertEquals(len(query.languages), 0) self.assertFalse(query.specific) + + def test_timeout_below100(self): + query_text = '<3 the query' + query = RawTextQuery(query_text, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), query_text) + self.assertEquals(len(query.query_parts), 3) + self.assertEquals(query.timeout_limit, 3) + self.assertFalse(query.specific) + + def test_timeout_above100(self): + query_text = '<350 the query' + query = RawTextQuery(query_text, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), query_text) + self.assertEquals(len(query.query_parts), 3) + self.assertEquals(query.timeout_limit, 0.35) + self.assertFalse(query.specific) + + def test_timeout_above1000(self): + query_text = '<3500 the query' + query = RawTextQuery(query_text, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), query_text) + self.assertEquals(len(query.query_parts), 3) + self.assertEquals(query.timeout_limit, 3.5) + self.assertFalse(query.specific) + + def test_timeout_invalid(self): + # invalid number: it is not bang but it is part of the query + query_text = '