From 15eef0ebdb15af80c026302bef250dc7f4417951 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 20 Jan 2017 18:52:47 +0100 Subject: [PATCH] [enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format. --- searx/exceptions.py | 32 +++++++++ searx/search.py | 66 +++++++++++------- .../__common__/opensearch_response_rss.xml | 6 ++ searx/webapp.py | 67 ++++++++++++++----- 4 files changed, 133 insertions(+), 38 deletions(-) create mode 100644 searx/exceptions.py diff --git a/searx/exceptions.py b/searx/exceptions.py new file mode 100644 index 00000000..c605ddca --- /dev/null +++ b/searx/exceptions.py @@ -0,0 +1,32 @@ +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2017- by Alexandre Flament, +''' + + +class SearxException(Exception): + pass + + +class SearxParameterException(SearxException): + + def __init__(self, name, value): + if value == '' or value is None: + message = 'Empty ' + name + ' parameter' + else: + message = 'Invalid value "' + value + '" for parameter ' + name + super(SearxParameterException, self).__init__(message) + self.parameter_name = name + self.parameter_value = value diff --git a/searx/search.py b/searx/search.py index e0f0cfd6..0bb77447 100644 --- a/searx/search.py +++ b/searx/search.py @@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery from searx.results import ResultContainer from searx import logger from searx.plugins import plugins +from searx.languages import language_codes +from searx.exceptions import SearxParameterException logger = logger.getChild('search') number_of_searches = 0 +language_code_set = set(l[0].lower() for l in language_codes) +language_code_set.add('all') + def send_http_request(engine, request_params, start_time, timeout_limit): # for page_load_time stats @@ -182,33 +187,13 @@ def default_request_params(): def get_search_query_from_webapp(preferences, form): - query = None - query_engines = [] - query_categories = [] - query_pageno = 1 - query_lang = 'all' - query_time_range = None + # no text for the query ? + if not form.get('q'): + raise SearxParameterException('q', '') # set blocked engines disabled_engines = preferences.engines.get_disabled() - # set specific language if set - query_lang = preferences.get_value('language') - - # safesearch - query_safesearch = preferences.get_value('safesearch') - - # TODO better exceptions - if not form.get('q'): - raise Exception('noquery') - - # set pagenumber - pageno_param = form.get('pageno', '1') - if not pageno_param.isdigit() or int(pageno_param) < 1: - pageno_param = 1 - - query_pageno = int(pageno_param) - # parse query, if tags are set, which change # the serch engine or search-language raw_text_query = RawTextQuery(form['q'], disabled_engines) @@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form): # set query query = raw_text_query.getSearchQuery() + # get and check page number + pageno_param = form.get('pageno', '1') + if not pageno_param.isdigit() or int(pageno_param) < 1: + raise SearxParameterException('pageno', pageno_param) + query_pageno = int(pageno_param) + + # get language # set specific language if set on request, query or preferences # TODO support search with multible languages if len(raw_text_query.languages): @@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form): else: query_lang = preferences.get_value('language') + # check language + if query_lang not in language_code_set: + raise SearxParameterException('language', query_lang) + + # get safesearch + if 'safesearch' in form: + query_safesearch = form.get('safesearch') + # first check safesearch + if not query_safesearch.isdigit(): + raise SearxParameterException('safesearch', query_safesearch) + query_safesearch = int(query_safesearch) + else: + query_safesearch = preferences.get_value('safesearch') + + # safesearch : second check + if query_safesearch < 0 or query_safesearch > 2: + raise SearxParameterException('safesearch', query_safesearch) + + # get time_range query_time_range = form.get('time_range') + # check time_range + if not(query_time_range is None)\ + and not (query_time_range in ['', 'day', 'week', 'month', 'year']): + raise SearxParameterException('time_range', query_time_range) + + # query_engines query_engines = raw_text_query.engines + # query_categories + query_categories = [] + # if engines are calculated from query, # set categories by using that informations if query_engines and raw_text_query.specific: diff --git a/searx/templates/__common__/opensearch_response_rss.xml b/searx/templates/__common__/opensearch_response_rss.xml index ddb60fa5..32c42e7c 100644 --- a/searx/templates/__common__/opensearch_response_rss.xml +++ b/searx/templates/__common__/opensearch_response_rss.xml @@ -11,6 +11,12 @@ {{ number_of_results }}