mirror of https://github.com/NaN-tic/nanscan.git
66 lines
2.4 KiB
Python
66 lines
2.4 KiB
Python
# Copyright (C) 2008 by Albert Cervera i Areny
|
|
# albert@nan-tic.com
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the
|
|
# Free Software Foundation, Inc.,
|
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
import codecs
|
|
import os
|
|
|
|
## @brief This class provides a simple way of converting similar characters
|
|
# to the same one.
|
|
#
|
|
# This can proof useful to overcome OCR errors and is used in Hamming class,
|
|
# for example. Default translation file provides families of characters. For
|
|
# example 's', 'S', '$' are in the same one because the OCR may sometimes
|
|
# recognize an 'S' as '$'. 'l', 'i' and '|' are in another family.
|
|
#
|
|
# The text 'eli a sa|de$' will be converted to 'ell a saldes'. The translator
|
|
# replaces any element of a family by the first character of the family it is in.
|
|
class Translator:
|
|
def __init__(self):
|
|
self.translations = None
|
|
|
|
## @brief Sets the translation list.
|
|
#
|
|
# The list should follow the following structure: [ 'sS$', 'li|', ... ]
|
|
def setTranslations(self, translations):
|
|
self.translations = translations
|
|
|
|
## @brief Loads the translation list from the given file.
|
|
#
|
|
# Each character family must be in a different line. See the default
|
|
# translations.txt file if you need an example.
|
|
def load(self, fileName):
|
|
f=codecs.open(fileName, 'r', 'utf-8')
|
|
if not f:
|
|
print "File not found"
|
|
return txt
|
|
self.translations = f.readlines()
|
|
f.close()
|
|
|
|
## @brief Returns the given text replacing each character with the first
|
|
# character of its family or itself if it's not in any character family.
|
|
def translated(self, text):
|
|
if self.translations == None:
|
|
self.load( os.path.join( os.path.abspath(os.path.dirname(__file__)), 'translations.txt' ) )
|
|
|
|
result = text
|
|
for x in self.translations:
|
|
for y in x[1:]:
|
|
result = result.replace( y, x[0] )
|
|
return result
|
|
|