[enh] Add timeout limit per request (#1640)

The new url parameter "timeout_limit" set timeout limit defined in second.
Example "timeout_limit=1.5" means the timeout limit is 1.5 seconds.

In addition, the query can start with <[number] to set the timeout limit.

For number between 0 and 99, the unit is the second :
Example: "<30 searx" means the timeout limit is 3 seconds

For number above 100, the unit is the millisecond:
Example: "<850 searx" means the timeout is 850 milliseconds.

In addition, there is a new optional setting: outgoing.max_request_timeout.
If not set, the user timeout can't go above searx configuration (as before: the max timeout of selected engine for a query).

If the value is set, the user can set a timeout between 0 and max_request_timeout using
<[number] or timeout_limit query parameter.

Related to #1077
Updated version of PR #1413 from @isj-privacore
This commit is contained in:
Alexandre Flament 2019-08-02 13:50:51 +02:00 committed by GitHub
parent 2179079a91
commit 72029d27de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 180 additions and 15 deletions

View file

@ -43,6 +43,7 @@ class RawTextQuery(object):
self.query_parts = [] self.query_parts = []
self.engines = [] self.engines = []
self.languages = [] self.languages = []
self.timeout_limit = None
self.specific = False self.specific = False
# parse query, if tags are set, which # parse query, if tags are set, which
@ -69,6 +70,21 @@ class RawTextQuery(object):
self.query_parts.append(query_part) self.query_parts.append(query_part)
continue continue
# this force the timeout
if query_part[0] == '<':
try:
raw_timeout_limit = int(query_part[1:])
if raw_timeout_limit < 100:
# below 100, the unit is the second ( <3 = 3 seconds timeout )
self.timeout_limit = float(raw_timeout_limit)
else:
# 100 or above, the unit is the millisecond ( <850 = 850 milliseconds timeout )
self.timeout_limit = raw_timeout_limit / 1000.0
parse_next = True
except ValueError:
# error not reported to the user
pass
# this force a language # this force a language
if query_part[0] == ':': if query_part[0] == ':':
lang = query_part[1:].lower().replace('_', '-') lang = query_part[1:].lower().replace('_', '-')
@ -161,7 +177,7 @@ class RawTextQuery(object):
class SearchQuery(object): class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)""" """container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range): def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None):
self.query = query.encode('utf-8') self.query = query.encode('utf-8')
self.engines = engines self.engines = engines
self.categories = categories self.categories = categories
@ -169,6 +185,7 @@ class SearchQuery(object):
self.safesearch = safesearch self.safesearch = safesearch
self.pageno = pageno self.pageno = pageno
self.time_range = time_range self.time_range = time_range
self.timeout_limit = timeout_limit
def __str__(self): def __str__(self):
return str(self.query) + ";" + str(self.engines) return str(self.query) + ";" + str(self.engines)

View file

@ -45,6 +45,16 @@ if sys.version_info[0] == 3:
logger = logger.getChild('search') logger = logger.getChild('search')
number_of_searches = 0 number_of_searches = 0
max_request_timeout = settings.get('outgoing', {}).get('max_request_timeout' or None)
if max_request_timeout is None:
logger.info('max_request_timeout={0}'.format(max_request_timeout))
else:
if isinstance(max_request_timeout, float):
logger.info('max_request_timeout={0} second(s)'.format(max_request_timeout))
else:
logger.critical('outgoing.max_request_timeout if defined has to be float')
from sys import exit
exit(1)
def send_http_request(engine, request_params): def send_http_request(engine, request_params):
@ -265,6 +275,15 @@ def get_search_query_from_webapp(preferences, form):
# query_engines # query_engines
query_engines = raw_text_query.engines query_engines = raw_text_query.engines
# timeout_limit
query_timeout = raw_text_query.timeout_limit
if query_timeout is None and 'timeout_limit' in form:
raw_time_limit = form.get('timeout_limit')
try:
query_timeout = float(raw_time_limit)
except ValueError:
raise SearxParameterException('timeout_limit', raw_time_limit)
# query_categories # query_categories
query_categories = [] query_categories = []
@ -338,7 +357,8 @@ def get_search_query_from_webapp(preferences, form):
query_engines = deduplicate_query_engines(query_engines) query_engines = deduplicate_query_engines(query_engines)
return (SearchQuery(query, query_engines, query_categories, return (SearchQuery(query, query_engines, query_categories,
query_lang, query_safesearch, query_pageno, query_time_range), query_lang, query_safesearch, query_pageno,
query_time_range, query_timeout),
raw_text_query) raw_text_query)
@ -351,6 +371,7 @@ class Search(object):
super(Search, self).__init__() super(Search, self).__init__()
self.search_query = search_query self.search_query = search_query
self.result_container = ResultContainer() self.result_container = ResultContainer()
self.actual_timeout = None
# do search-request # do search-request
def search(self): def search(self):
@ -380,7 +401,7 @@ class Search(object):
search_query = self.search_query search_query = self.search_query
# max of all selected engine timeout # max of all selected engine timeout
timeout_limit = 0 default_timeout = 0
# start search-reqest for all selected engines # start search-reqest for all selected engines
for selected_engine in search_query.engines: for selected_engine in search_query.engines:
@ -420,12 +441,32 @@ class Search(object):
# append request to list # append request to list
requests.append((selected_engine['name'], search_query.query, request_params)) requests.append((selected_engine['name'], search_query.query, request_params))
# update timeout_limit # update default_timeout
timeout_limit = max(timeout_limit, engine.timeout) default_timeout = max(default_timeout, engine.timeout)
# adjust timeout
self.actual_timeout = default_timeout
query_timeout = self.search_query.timeout_limit
if max_request_timeout is None and query_timeout is None:
# No max, no user query: default_timeout
pass
elif max_request_timeout is None and query_timeout is not None:
# No max, but user query: From user query except if above default
self.actual_timeout = min(default_timeout, query_timeout)
elif max_request_timeout is not None and query_timeout is None:
# Max, no user query: Default except if above max
self.actual_timeout = min(default_timeout, max_request_timeout)
elif max_request_timeout is not None and query_timeout is not None:
# Max & user query: From user query except if above max
self.actual_timeout = min(query_timeout, max_request_timeout)
logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})"
.format(self.actual_timeout, default_timeout, query_timeout, max_request_timeout))
if requests:
# send all search-request # send all search-request
search_multiple_requests(requests, self.result_container, start_time, timeout_limit) if requests:
search_multiple_requests(requests, self.result_container, start_time, self.actual_timeout)
start_new_thread(gc.collect, tuple()) start_new_thread(gc.collect, tuple())
# return results, suggestions, answers and infoboxes # return results, suggestions, answers and infoboxes

View file

@ -34,7 +34,8 @@ ui:
# key : !!binary "your_morty_proxy_key" # key : !!binary "your_morty_proxy_key"
outgoing: # communication with search engines outgoing: # communication with search engines
request_timeout : 2.0 # seconds request_timeout : 2.0 # default timeout in seconds, can be override by engine
# max_request_timeout: 10.0 # the maximum timeout in seconds
useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
pool_connections : 100 # Number of different hosts pool_connections : 100 # Number of different hosts
pool_maxsize : 10 # Number of simultaneous requests by host pool_maxsize : 10 # Number of simultaneous requests by host

View file

@ -5,6 +5,7 @@
<input type="hidden" name="pageno" value="{{ pageno }}" /> <input type="hidden" name="pageno" value="{{ pageno }}" />
<input type="hidden" name="time_range" value="{{ time_range }}" /> <input type="hidden" name="time_range" value="{{ time_range }}" />
<input type="hidden" name="language" value="{{ current_language }}" /> <input type="hidden" name="language" value="{{ current_language }}" />
<input type="hidden" name="timeout_limit" value="{{ timeout_limit }}" />
{%- endmacro %} {%- endmacro %}
{%- macro search_url() %}{{ base_url }}?q={{ q|urlencode }}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if time_range %}&amp;time_range={{ time_range }}{% endif %}{% if current_language != 'all' %}&amp;language={{ current_language }}{% endif %}{% endmacro -%} {%- macro search_url() %}{{ base_url }}?q={{ q|urlencode }}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if time_range %}&amp;time_range={{ time_range }}{% endif %}{% if current_language != 'all' %}&amp;language={{ current_language }}{% endif %}{% endmacro -%}

View file

@ -36,6 +36,11 @@
{% for suggestion in topic.suggestions %} {% for suggestion in topic.suggestions %}
<form method="{{ method or 'POST' }}" action="{{ url_for('index') }}"> <form method="{{ method or 'POST' }}" action="{{ url_for('index') }}">
<input type="hidden" name="q" value="{{ suggestion }}"> <input type="hidden" name="q" value="{{ suggestion }}">
<input type="hidden" name="time_range" value="{{ time_range }}">
<input type="hidden" name="language" value="{{ current_language }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}">
<input type="hidden" name="theme" value="{{ theme }}">
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
<input type="submit" value="{{ suggestion }}" /> <input type="submit" value="{{ suggestion }}" />
</form> </form>
{% endfor %} {% endfor %}

View file

@ -51,9 +51,11 @@
{% for suggestion in suggestions %} {% for suggestion in suggestions %}
<form method="{{ method or 'POST' }}" action="{{ url_for('index') }}"> <form method="{{ method or 'POST' }}" action="{{ url_for('index') }}">
<input type="hidden" name="q" value="{{ suggestion.url }}"> <input type="hidden" name="q" value="{{ suggestion.url }}">
<input type="hidden" name="time_range" value="{{ time_range }}">
<input type="hidden" name="language" value="{{ current_language }}"> <input type="hidden" name="language" value="{{ current_language }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}"> <input type="hidden" name="safesearch" value="{{ safesearch }}">
<input type="hidden" name="theme" value="{{ theme }}"> <input type="hidden" name="theme" value="{{ theme }}">
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
<input type="submit" class="suggestion" value="&bull; {{ suggestion.title }}"> <input type="submit" class="suggestion" value="&bull; {{ suggestion.title }}">
</form> </form>
{% endfor %} {% endfor %}
@ -63,7 +65,7 @@
<div id="search_url"> <div id="search_url">
<h4 class="title">{{ _('Search URL') }} :</h4> <h4 class="title">{{ _('Search URL') }} :</h4>
<div class="selectable_url"><pre>{{ base_url }}?q={{ q|urlencode }}&amp;language={{ current_language }}&amp;time_range={{ time_range }}&amp;safesearch={{ safesearch }}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}</pre></div> <div class="selectable_url"><pre>{{ base_url }}?q={{ q|urlencode }}&amp;language={{ current_language }}&amp;time_range={{ time_range }}&amp;safesearch={{ safesearch }}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if timeout_limit %}&amp;timeout_limit={{ timeout_limit|urlencode }}{% endif %}</pre></div>
</div> </div>
<div id="apis"> <div id="apis">
<h4 class="title">{{ _('Download results') }}</h4> <h4 class="title">{{ _('Download results') }}</h4>
@ -79,6 +81,7 @@
<input type="hidden" name="language" value="{{ current_language }}"> <input type="hidden" name="language" value="{{ current_language }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}"> <input type="hidden" name="safesearch" value="{{ safesearch }}">
<input type="hidden" name="format" value="{{ output_type }}"> <input type="hidden" name="format" value="{{ output_type }}">
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
<input type="submit" value="{{ output_type }}"> <input type="submit" value="{{ output_type }}">
</form> </form>
</div> </div>
@ -97,6 +100,7 @@
<input type="hidden" name="language" value="{{ current_language }}"> <input type="hidden" name="language" value="{{ current_language }}">
<input type="hidden" name="safesearch" value="{{ safesearch }}"> <input type="hidden" name="safesearch" value="{{ safesearch }}">
<input type="hidden" name="theme" value="{{ theme }}"> <input type="hidden" name="theme" value="{{ theme }}">
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit }}" >{% endif %}
<input type="submit" value="{{ correction }}"> <input type="submit" value="{{ correction }}">
</form> </form>
</div> </div>
@ -134,6 +138,7 @@
<input type="hidden" name="language" value="{{ current_language }}" > <input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="safesearch" value="{{ safesearch }}" > <input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" > <input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
<button type="submit">{{ icon_small('chevron-left') }} {{ _('previous page') }}</button> <button type="submit">{{ icon_small('chevron-left') }} {{ _('previous page') }}</button>
</div> </div>
</form> </form>
@ -149,6 +154,7 @@
<input type="hidden" name="language" value="{{ current_language }}" > <input type="hidden" name="language" value="{{ current_language }}" >
<input type="hidden" name="safesearch" value="{{ safesearch }}" > <input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" > <input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
<button type="submit">{{ _('next page') }} {{ icon_small('chevron-right') }}</button> <button type="submit">{{ _('next page') }} {{ icon_small('chevron-right') }}</button>
</div> </div>
</form> </form>

View file

@ -14,4 +14,5 @@
{% include 'simple/categories.html' %} {% include 'simple/categories.html' %}
<input type="hidden" name="safesearch" value="{{ safesearch }}" > <input type="hidden" name="safesearch" value="{{ safesearch }}" >
<input type="hidden" name="theme" value="{{ theme }}" > <input type="hidden" name="theme" value="{{ theme }}" >
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
</form> </form>

View file

@ -628,7 +628,8 @@ def index():
fallback=settings['search']['language']), fallback=settings['search']['language']),
base_url=get_base_url(), base_url=get_base_url(),
theme=get_current_theme_name(), theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())] favicons=global_favicons[themes.index(get_current_theme_name())],
timeout_limit=request.form.get('timeout_limit', None)
) )

View file

@ -62,3 +62,45 @@ class TestQuery(SearxTestCase):
self.assertEquals(len(query.query_parts), 1) self.assertEquals(len(query.query_parts), 1)
self.assertEquals(len(query.languages), 0) self.assertEquals(len(query.languages), 0)
self.assertFalse(query.specific) self.assertFalse(query.specific)
def test_timeout_below100(self):
query_text = '<3 the query'
query = RawTextQuery(query_text, [])
query.parse_query()
self.assertEquals(query.getFullQuery(), query_text)
self.assertEquals(len(query.query_parts), 3)
self.assertEquals(query.timeout_limit, 3)
self.assertFalse(query.specific)
def test_timeout_above100(self):
query_text = '<350 the query'
query = RawTextQuery(query_text, [])
query.parse_query()
self.assertEquals(query.getFullQuery(), query_text)
self.assertEquals(len(query.query_parts), 3)
self.assertEquals(query.timeout_limit, 0.35)
self.assertFalse(query.specific)
def test_timeout_above1000(self):
query_text = '<3500 the query'
query = RawTextQuery(query_text, [])
query.parse_query()
self.assertEquals(query.getFullQuery(), query_text)
self.assertEquals(len(query.query_parts), 3)
self.assertEquals(query.timeout_limit, 3.5)
self.assertFalse(query.specific)
def test_timeout_invalid(self):
# invalid number: it is not bang but it is part of the query
query_text = '<xxx the query'
query = RawTextQuery(query_text, [])
query.parse_query()
self.assertEquals(query.getFullQuery(), query_text)
self.assertEquals(len(query.query_parts), 1)
self.assertEquals(query.query_parts[0], query_text)
self.assertEquals(query.timeout_limit, None)
self.assertFalse(query.specific)

View file

@ -2,9 +2,59 @@
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
import searx.preferences
import searx.search
import searx.engines
# TODO
class SearchTestCase(SearxTestCase): class SearchTestCase(SearxTestCase):
def test_(self): @classmethod
pass def setUpClass(cls):
searx.engines.initialize_engines([{
'name': 'general dummy',
'engine': 'dummy',
'categories': 'general',
'shortcut': 'gd',
'timeout': 3.0
}])
def test_timeout_simple(self):
searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, None)
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 3.0)
def test_timeout_query_above_default_nomax(self):
searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 5.0)
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 3.0)
def test_timeout_query_below_default_nomax(self):
searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 1.0)
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 1.0)
def test_timeout_query_below_max(self):
searx.search.max_request_timeout = 10.0
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 5.0)
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 5.0)
def test_timeout_query_above_max(self):
searx.search.max_request_timeout = 10.0
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
['general'], 'en-US', 0, 1, None, 15.0)
search = searx.search.Search(search_query)
search.search()
self.assertEquals(search.actual_timeout, 10.0)