[enh] introduce private engines

This PR adds a new setting to engines named `tokens`.
It expects a list of tokens which lets searx validate
if the request should be accepted or not.
This commit is contained in:
Noémi Ványi 2020-02-01 11:01:17 +01:00
parent f9c7a678d2
commit 99435381a8
10 changed files with 161 additions and 28 deletions

View File

@ -38,6 +38,7 @@ def check_settings_yml(file_name):
else: else:
return None return None
# find location of settings.yml # find location of settings.yml
if 'SEARX_SETTINGS_PATH' in environ: if 'SEARX_SETTINGS_PATH' in environ:
# if possible set path to settings using the # if possible set path to settings using the

View File

@ -54,7 +54,8 @@ engine_default_args = {'paging': False,
'suspend_end_time': 0, 'suspend_end_time': 0,
'continuous_errors': 0, 'continuous_errors': 0,
'time_range_support': False, 'time_range_support': False,
'offline': False} 'offline': False,
'tokens': []}
def load_engine(engine_data): def load_engine(engine_data):
@ -160,7 +161,7 @@ def to_percentage(stats, maxvalue):
return stats return stats
def get_engines_stats(): def get_engines_stats(preferences):
# TODO refactor # TODO refactor
pageloads = [] pageloads = []
engine_times = [] engine_times = []
@ -171,8 +172,12 @@ def get_engines_stats():
max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
for engine in engines.values(): for engine in engines.values():
if not preferences.validate_token(engine):
continue
if engine.stats['search_count'] == 0: if engine.stats['search_count'] == 0:
continue continue
results_num = \ results_num = \
engine.stats['result_count'] / float(engine.stats['search_count']) engine.stats['result_count'] / float(engine.stats['search_count'])

View File

@ -0,0 +1,12 @@
"""
Dummy Offline
@results one result
@stable yes
"""
def search(query, request_params):
return [{
'result': 'this is what you get',
}]

View File

@ -72,6 +72,7 @@ def parse_album(hit):
result.update({'content': 'Released: {}'.format(year)}) result.update({'content': 'Released: {}'.format(year)})
return result return result
parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album} parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album}

View File

@ -104,6 +104,31 @@ class MultipleChoiceSetting(EnumStringSetting):
resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
class SetSetting(Setting):
def _post_init(self):
if not hasattr(self, 'values'):
self.values = set()
def get_value(self):
return ','.join(self.values)
def parse(self, data):
if data == '':
self.values = set()
return
elements = data.split(',')
for element in elements:
self.values.add(element)
def parse_form(self, data):
elements = data.split(',')
self.values = set(elements)
def save(self, name, resp):
resp.set_cookie(name, ','.join(self.values), max_age=COOKIE_MAX_AGE)
class SearchLanguageSetting(EnumStringSetting): class SearchLanguageSetting(EnumStringSetting):
"""Available choices may change, so user's value may not be in choices anymore""" """Available choices may change, so user's value may not be in choices anymore"""
@ -272,6 +297,7 @@ class Preferences(object):
self.engines = EnginesSetting('engines', choices=engines) self.engines = EnginesSetting('engines', choices=engines)
self.plugins = PluginsSetting('plugins', choices=plugins) self.plugins = PluginsSetting('plugins', choices=plugins)
self.tokens = SetSetting('tokens')
self.unknown_params = {} self.unknown_params = {}
def get_as_url_params(self): def get_as_url_params(self):
@ -288,6 +314,8 @@ class Preferences(object):
settings_kv['disabled_plugins'] = ','.join(self.plugins.disabled) settings_kv['disabled_plugins'] = ','.join(self.plugins.disabled)
settings_kv['enabled_plugins'] = ','.join(self.plugins.enabled) settings_kv['enabled_plugins'] = ','.join(self.plugins.enabled)
settings_kv['tokens'] = ','.join(self.tokens.values)
return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8') return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8')
def parse_encoded_data(self, input_data): def parse_encoded_data(self, input_data):
@ -307,6 +335,8 @@ class Preferences(object):
elif user_setting_name == 'disabled_plugins': elif user_setting_name == 'disabled_plugins':
self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), self.plugins.parse_cookie((input_data.get('disabled_plugins', ''),
input_data.get('enabled_plugins', ''))) input_data.get('enabled_plugins', '')))
elif user_setting_name == 'tokens':
self.tokens.parse(user_setting)
elif not any(user_setting_name.startswith(x) for x in [ elif not any(user_setting_name.startswith(x) for x in [
'enabled_', 'enabled_',
'disabled_', 'disabled_',
@ -328,6 +358,8 @@ class Preferences(object):
enabled_categories.append(user_setting_name[len('category_'):]) enabled_categories.append(user_setting_name[len('category_'):])
elif user_setting_name.startswith('plugin_'): elif user_setting_name.startswith('plugin_'):
disabled_plugins.append(user_setting_name) disabled_plugins.append(user_setting_name)
elif user_setting_name == 'tokens':
self.tokens.parse_form(user_setting)
else: else:
self.unknown_params[user_setting_name] = user_setting self.unknown_params[user_setting_name] = user_setting
self.key_value_settings['categories'].parse_form(enabled_categories) self.key_value_settings['categories'].parse_form(enabled_categories)
@ -346,6 +378,18 @@ class Preferences(object):
user_setting.save(user_setting_name, resp) user_setting.save(user_setting_name, resp)
self.engines.save(resp) self.engines.save(resp)
self.plugins.save(resp) self.plugins.save(resp)
self.tokens.save('tokens', resp)
for k, v in self.unknown_params.items(): for k, v in self.unknown_params.items():
resp.set_cookie(k, v, max_age=COOKIE_MAX_AGE) resp.set_cookie(k, v, max_age=COOKIE_MAX_AGE)
return resp return resp
def validate_token(self, engine):
valid = True
if hasattr(engine, 'tokens') and engine.tokens:
valid = False
for token in self.tokens.values:
if token in engine.tokens:
valid = True
break
return valid

View File

@ -177,7 +177,8 @@ class RawTextQuery(object):
class SearchQuery(object): class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)""" """container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None): def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
timeout_limit=None, preferences=None):
self.query = query.encode('utf-8') self.query = query.encode('utf-8')
self.engines = engines self.engines = engines
self.categories = categories self.categories = categories
@ -186,6 +187,7 @@ class SearchQuery(object):
self.pageno = pageno self.pageno = pageno
self.time_range = None if time_range in ('', 'None', None) else time_range self.time_range = None if time_range in ('', 'None', None) else time_range
self.timeout_limit = timeout_limit self.timeout_limit = timeout_limit
self.preferences = preferences
def __str__(self): def __str__(self):
return str(self.query) + ";" + str(self.engines) return str(self.query) + ";" + str(self.engines)

View File

@ -407,7 +407,7 @@ def get_search_query_from_webapp(preferences, form):
return (SearchQuery(query, query_engines, query_categories, return (SearchQuery(query, query_engines, query_categories,
query_lang, query_safesearch, query_pageno, query_lang, query_safesearch, query_pageno,
query_time_range, query_timeout), query_time_range, query_timeout, preferences),
raw_text_query) raw_text_query)
@ -459,6 +459,9 @@ class Search(object):
engine = engines[selected_engine['name']] engine = engines[selected_engine['name']]
if not search_query.preferences.validate_token(engine):
continue
# skip suspended engines # skip suspended engines
if engine.suspend_end_time >= time(): if engine.suspend_end_time >= time():
logger.debug('Engine currently suspended: %s', selected_engine['name']) logger.debug('Engine currently suspended: %s', selected_engine['name'])

View File

@ -131,6 +131,12 @@
{% endfor %} {% endfor %}
</select> </select>
{{ preferences_item_footer(info, label, rtl) }} {{ preferences_item_footer(info, label, rtl) }}
{% set label = _('Engine tokens') %}
{% set info = _('Access tokens for private engines') %}
{{ preferences_item_header(info, label, rtl) }}
<input class="form-control" id='tokens' name='tokens' value='{{ preferences.tokens.get_value() }}'/>
{{ preferences_item_footer(info, label, rtl) }}
</div> </div>
</fieldset> </fieldset>
</div> </div>

View File

@ -731,8 +731,13 @@ def preferences():
# stats for preferences page # stats for preferences page
stats = {} stats = {}
engines_by_category = {}
for c in categories: for c in categories:
engines_by_category[c] = []
for e in categories[c]: for e in categories[c]:
if not request.preferences.validate_token(e):
continue
stats[e.name] = {'time': None, stats[e.name] = {'time': None,
'warn_timeout': False, 'warn_timeout': False,
'warn_time': False} 'warn_time': False}
@ -740,9 +745,11 @@ def preferences():
stats[e.name]['warn_timeout'] = True stats[e.name]['warn_timeout'] = True
stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences) stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences)
engines_by_category[c].append(e)
# get first element [0], the engine time, # get first element [0], the engine time,
# and then the second element [1] : the time (the first one is the label) # and then the second element [1] : the time (the first one is the label)
for engine_stat in get_engines_stats()[0][1]: for engine_stat in get_engines_stats(request.preferences)[0][1]:
stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3) stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3)
if engine_stat.get('avg') > settings['outgoing']['request_timeout']: if engine_stat.get('avg') > settings['outgoing']['request_timeout']:
stats[engine_stat.get('name')]['warn_time'] = True stats[engine_stat.get('name')]['warn_time'] = True
@ -752,7 +759,7 @@ def preferences():
locales=settings['locales'], locales=settings['locales'],
current_locale=get_locale(), current_locale=get_locale(),
image_proxy=image_proxy, image_proxy=image_proxy,
engines_by_category=categories, engines_by_category=engines_by_category,
stats=stats, stats=stats,
answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
disabled_engines=disabled_engines, disabled_engines=disabled_engines,
@ -828,7 +835,7 @@ def image_proxy():
@app.route('/stats', methods=['GET']) @app.route('/stats', methods=['GET'])
def stats(): def stats():
"""Render engine statistics page.""" """Render engine statistics page."""
stats = get_engines_stats() stats = get_engines_stats(request.preferences)
return render( return render(
'stats.html', 'stats.html',
stats=stats, stats=stats,
@ -891,7 +898,7 @@ def clear_cookies():
@app.route('/config') @app.route('/config')
def config(): def config():
return jsonify({'categories': list(categories.keys()), return jsonify({'categories': list(categories.keys()),
'engines': [{'name': engine_name, 'engines': [{'name': name,
'categories': engine.categories, 'categories': engine.categories,
'shortcut': engine.shortcut, 'shortcut': engine.shortcut,
'enabled': not engine.disabled, 'enabled': not engine.disabled,
@ -904,7 +911,7 @@ def config():
'safesearch': engine.safesearch, 'safesearch': engine.safesearch,
'time_range_support': engine.time_range_support, 'time_range_support': engine.time_range_support,
'timeout': engine.timeout} 'timeout': engine.timeout}
for engine_name, engine in engines.items()], for name, engine in engines.items() if request.preferences.validate_token(engine)],
'plugins': [{'name': plugin.name, 'plugins': [{'name': plugin.name,
'enabled': plugin.default_on} 'enabled': plugin.default_on}
for plugin in plugins], for plugin in plugins],

View File

@ -1,60 +1,112 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
from searx.preferences import Preferences
from searx.engines import engines
import searx.preferences
import searx.search import searx.search
import searx.engines
SAFESEARCH = 0
PAGENO = 1
PUBLIC_ENGINE_NAME = 'general dummy'
PRIVATE_ENGINE_NAME = 'general private offline'
TEST_ENGINES = [
{
'name': PUBLIC_ENGINE_NAME,
'engine': 'dummy',
'categories': 'general',
'shortcut': 'gd',
'timeout': 3.0,
'tokens': [],
},
{
'name': PRIVATE_ENGINE_NAME,
'engine': 'dummy-offline',
'categories': 'general',
'shortcut': 'do',
'timeout': 3.0,
'offline': True,
'tokens': ['my-token'],
},
]
class SearchTestCase(SearxTestCase): class SearchTestCase(SearxTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
searx.engines.initialize_engines([{ searx.engines.initialize_engines(TEST_ENGINES)
'name': 'general dummy',
'engine': 'dummy',
'categories': 'general',
'shortcut': 'gd',
'timeout': 3.0
}])
def test_timeout_simple(self): def test_timeout_simple(self):
searx.search.max_request_timeout = None searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', 0, 1, None, None) ['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query) search = searx.search.Search(search_query)
search.search() search.search()
self.assertEquals(search.actual_timeout, 3.0) self.assertEquals(search.actual_timeout, 3.0)
def test_timeout_query_above_default_nomax(self): def test_timeout_query_above_default_nomax(self):
searx.search.max_request_timeout = None searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', 0, 1, None, 5.0) ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query) search = searx.search.Search(search_query)
search.search() search.search()
self.assertEquals(search.actual_timeout, 3.0) self.assertEquals(search.actual_timeout, 3.0)
def test_timeout_query_below_default_nomax(self): def test_timeout_query_below_default_nomax(self):
searx.search.max_request_timeout = None searx.search.max_request_timeout = None
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', 0, 1, None, 1.0) ['general'], 'en-US', SAFESEARCH, PAGENO, None, 1.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query) search = searx.search.Search(search_query)
search.search() search.search()
self.assertEquals(search.actual_timeout, 1.0) self.assertEquals(search.actual_timeout, 1.0)
def test_timeout_query_below_max(self): def test_timeout_query_below_max(self):
searx.search.max_request_timeout = 10.0 searx.search.max_request_timeout = 10.0
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', 0, 1, None, 5.0) ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query) search = searx.search.Search(search_query)
search.search() search.search()
self.assertEquals(search.actual_timeout, 5.0) self.assertEquals(search.actual_timeout, 5.0)
def test_timeout_query_above_max(self): def test_timeout_query_above_max(self):
searx.search.max_request_timeout = 10.0 searx.search.max_request_timeout = 10.0
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
['general'], 'en-US', 0, 1, None, 15.0) ['general'], 'en-US', SAFESEARCH, PAGENO, None, 15.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query) search = searx.search.Search(search_query)
search.search() search.search()
self.assertEquals(search.actual_timeout, 10.0) self.assertEquals(search.actual_timeout, 10.0)
def test_query_private_engine_without_token(self):
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0,
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
results = search.search()
self.assertEquals(results.results_length(), 0)
def test_query_private_engine_with_incorrect_token(self):
preferences_with_tokens = Preferences(['oscar'], ['general'], engines, [])
preferences_with_tokens.parse_dict({'tokens': 'bad-token'})
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0,
preferences=preferences_with_tokens)
search = searx.search.Search(search_query)
results = search.search()
self.assertEquals(results.results_length(), 0)
def test_query_private_engine_with_correct_token(self):
preferences_with_tokens = Preferences(['oscar'], ['general'], engines, [])
preferences_with_tokens.parse_dict({'tokens': 'my-token'})
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}],
['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0,
preferences=preferences_with_tokens)
search = searx.search.Search(search_query)
results = search.search()
self.assertEquals(results.results_length(), 1)