2017-07-25 08:54:41 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# The COPYRIGHT file at the top level of this repository contains the full
|
|
|
|
# copyright notices and license terms.
|
|
|
|
import unicodedata
|
|
|
|
|
2018-10-11 15:32:09 +02:00
|
|
|
src_chars = "/*+?¿!$[]{}@#`^:;<>=~%\\"
|
|
|
|
dst_chars = "________________________"
|
2018-01-18 17:08:54 +01:00
|
|
|
|
2017-07-25 08:54:41 +02:00
|
|
|
|
|
|
|
def normalize(text):
|
2018-10-11 15:32:09 +02:00
|
|
|
if isinstance(text, str):
|
2017-07-25 08:54:41 +02:00
|
|
|
text = text.encode('utf-8')
|
|
|
|
return text
|
|
|
|
|
2018-01-18 17:08:54 +01:00
|
|
|
|
2017-07-25 08:54:41 +02:00
|
|
|
def unaccent(text):
|
2018-01-18 17:08:54 +01:00
|
|
|
if isinstance(text, bytes):
|
2018-10-11 15:32:09 +02:00
|
|
|
text = str(text, 'utf-8')
|
2017-07-25 08:54:41 +02:00
|
|
|
output = text
|
2018-10-11 15:32:09 +02:00
|
|
|
for c in range(len(src_chars)):
|
2017-07-25 08:54:41 +02:00
|
|
|
if c >= len(dst_chars):
|
|
|
|
break
|
|
|
|
output = output.replace(src_chars[c], dst_chars[c])
|
|
|
|
output = unicodedata.normalize('NFKD', output).encode('ASCII',
|
|
|
|
'ignore')
|
|
|
|
return output.strip('_').encode('utf-8')
|