make search language handling less strict

languages.py can change, so users may query on a language that is not
on the list anymore, even if it is still recognized by a few engines.

also made no and nb the same because they seem to return the same,
though most engines will only support one or the other.
This commit is contained in:
marc 2017-03-01 17:11:51 -06:00 committed by Adam Tauber
parent 805fb02ed1
commit fd65c12921
8 changed files with 17 additions and 15 deletions

File diff suppressed because one or more lines are too long

View File

@ -94,6 +94,8 @@ def _fetch_supported_languages(resp):
options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options:
code = option.xpath('./@id')[0].replace('_', '-')
if code == 'nb':
code = 'no'
supported_languages.append(code)
return supported_languages

View File

@ -47,6 +47,8 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
if params['language'] == 'no' or params['language'].startswith('no-'):
params['language'] = params['language'].replace('no', 'nb', 1)
if params['language'].find('-') < 0:
# tries to get a country code from language
for lang in supported_languages:
@ -118,6 +120,8 @@ def _fetch_supported_languages(resp):
supported_languages = []
for lang in regions_json['languages'].values():
if lang['code'] == 'nb':
lang['code'] = 'no'
for country in lang['countries']:
supported_languages.append(lang['code'] + '-' + country)

View File

@ -120,6 +120,8 @@ def _fetch_supported_languages(resp):
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]
if code.startswith('nb-'):
code = code.replace('nb', 'no', 1)
supported_languages.append(code)
return supported_languages

View File

@ -57,6 +57,7 @@ language_codes = (
(u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"no-NO", u"Norsk", u"", u"Norwegian"),
(u"pl-PL", u"Polski", u"", u"Polish"),
(u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),

View File

@ -107,6 +107,8 @@ class SearchLanguageSetting(EnumStringSetting):
pass
elif lang in self.choices:
data = lang
elif data == 'nb-NO':
data = 'no-NO'
elif data == 'ar-XA':
data = 'ar-SA'
else:

View File

@ -24,7 +24,7 @@ from searx.engines import (
import string
import re
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$')
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
class RawTextQuery(object):
@ -68,7 +68,7 @@ class RawTextQuery(object):
# this force a language
if query_part[0] == ':':
lang = query_part[1:].lower()
lang = query_part[1:].lower().replace('_', '-')
# user may set a valid, yet not selectable language
if VALID_LANGUAGE_CODE.match(lang):
@ -86,7 +86,7 @@ class RawTextQuery(object):
or lang_id.startswith(lang)\
or lang == lang_name\
or lang == english_name\
or lang.replace('_', ' ') == country:
or lang.replace('-', ' ') == country:
parse_next = True
self.languages.append(lang_id)
# to ensure best match (first match is not necessarily the best one)

View File

@ -27,20 +27,16 @@ from searx.engines import (
)
from searx.answerers import ask
from searx.utils import gen_useragent
from searx.query import RawTextQuery, SearchQuery
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
from searx.languages import language_codes
from searx.exceptions import SearxParameterException
logger = logger.getChild('search')
number_of_searches = 0
language_code_set = set(l[0].lower() for l in language_codes)
language_code_set.add('all')
def send_http_request(engine, request_params, start_time, timeout_limit):
# for page_load_time stats
@ -219,7 +215,7 @@ def get_search_query_from_webapp(preferences, form):
query_lang = preferences.get_value('language')
# check language
if query_lang.lower() not in language_code_set:
if not VALID_LANGUAGE_CODE.match(query_lang):
raise SearxParameterException('language', query_lang)
# get safesearch
@ -371,11 +367,6 @@ class Search(object):
if search_query.pageno > 1 and not engine.paging:
continue
# if search-language is set and engine does not
# provide language-support, skip
if search_query.lang != 'all' and not engine.language_support:
continue
# if time_range is not supported, skip
if search_query.time_range and not engine.time_range_support:
continue