From 63a380c442480f2d7e1cb718791370b3114480a5 Mon Sep 17 00:00:00 2001 From: Albert Cervera i Areny Date: Mon, 1 Sep 2008 00:13:37 +0200 Subject: [PATCH] Added doxgen documentation. Added docs in the Hamming class. Added a Translator class. --- doc/Makefile | 9 + doc/doxygen/nanscan.doxyfile | 285 ++++++++++++++++++ doc/doxygen/stylesheets/doxygen-footer.html | 3 + doc/doxygen/stylesheets/doxygen-header.html | 6 + doc/doxygen/stylesheets/doxygen-tabs.css | 99 +++++++ doc/doxygen/stylesheets/doxygen.css | 310 ++++++++++++++++++++ hamming.py | 28 +- recognizer.py | 31 +- translations.txt | 7 + translator.py | 65 ++++ 10 files changed, 824 insertions(+), 19 deletions(-) create mode 100644 doc/Makefile create mode 100644 doc/doxygen/nanscan.doxyfile create mode 100644 doc/doxygen/stylesheets/doxygen-footer.html create mode 100644 doc/doxygen/stylesheets/doxygen-header.html create mode 100644 doc/doxygen/stylesheets/doxygen-tabs.css create mode 100644 doc/doxygen/stylesheets/doxygen.css create mode 100644 translations.txt create mode 100644 translator.py diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..033ba27 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,9 @@ +doc: docbook doxygen + +docbook: + cd src && docbook2html -o ../html nanscan.docbook && cp *.png ../html + +doxygen: + cd doxygen && doxygen nanscan.doxyfile + +.PHONY: doxygen docbook diff --git a/doc/doxygen/nanscan.doxyfile b/doc/doxygen/nanscan.doxyfile new file mode 100644 index 0000000..f87c673 --- /dev/null +++ b/doc/doxygen/nanscan.doxyfile @@ -0,0 +1,285 @@ +# Doxyfile 1.5.4 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = NaNScaN +PROJECT_NUMBER = 1.0 +OUTPUT_DIRECTORY = . +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class " \ + "The $name widget " \ + "The $name file " \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = NO +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +DETAILS_AT_TOP = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 8 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +DISTRIBUTE_GROUP_DOC = NO +SUBGROUPING = YES +TYPEDEF_HIDES_STRUCT = NO +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +EXTRACT_ANON_NSPACES = NO +HIDE_UNDOC_MEMBERS = YES +HIDE_UNDOC_CLASSES = YES +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_DIRECTORIES = NO +FILE_VERSION_FILTER = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_FORMAT = "$file:$line: $text " +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = ../.. +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.d \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.php \ + *.php3 \ + *.inc \ + *.m \ + *.mm \ + *.dox \ + *.py \ + *.f90 \ + *.C \ + *.CC \ + *.C++ \ + *.II \ + *.I++ \ + *.H \ + *.HH \ + *.H++ \ + *.CS \ + *.PHP \ + *.PHP3 \ + *.M \ + *.MM \ + *.PY \ + *.F90 +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = NO +USE_HTAGS = NO +VERBATIM_HEADERS = NO +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = NO +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = stylesheets/doxygen-header.html +HTML_FOOTER = stylesheets/doxygen-footer.html +HTML_STYLESHEET = stylesheets/doxygen.css +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +HTML_DYNAMIC_SECTIONS = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = NO +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4wide +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = NO +MSCGEN_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +CLASS_GRAPH = NO +COLLABORATION_GRAPH = NO +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = NO +INCLUDED_BY_GRAPH = NO +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 1000 +DOT_TRANSPARENT = YES +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- +SEARCHENGINE = NO diff --git a/doc/doxygen/stylesheets/doxygen-footer.html b/doc/doxygen/stylesheets/doxygen-footer.html new file mode 100644 index 0000000..3c0741f --- /dev/null +++ b/doc/doxygen/stylesheets/doxygen-footer.html @@ -0,0 +1,3 @@ +
Generated by Doxygen
+ + diff --git a/doc/doxygen/stylesheets/doxygen-header.html b/doc/doxygen/stylesheets/doxygen-header.html new file mode 100644 index 0000000..8e1d10e --- /dev/null +++ b/doc/doxygen/stylesheets/doxygen-header.html @@ -0,0 +1,6 @@ + + +KTiny API documentation + + + diff --git a/doc/doxygen/stylesheets/doxygen-tabs.css b/doc/doxygen/stylesheets/doxygen-tabs.css new file mode 100644 index 0000000..dd4382a --- /dev/null +++ b/doc/doxygen/stylesheets/doxygen-tabs.css @@ -0,0 +1,99 @@ +/* tabs styles, based on http://www.alistapart.com/articles/slidingdoors */ + +DIV.tabs +{ + float : left; + width : 100%; + border-top : 1px solid #206dcd; + margin-bottom : 4px; +} + +DIV.tabs UL +{ + margin : 0px; + padding-left : 10px; + list-style : none; +} + +DIV.tabs LI, DIV.tabs FORM +{ + display : inline; + margin : 0px; + padding : 0px; +} + +DIV.tabs FORM +{ + float : right; +} + +DIV.tabs A +{ + float : left; + font-size : small; + font-weight : bold; + text-decoration : none; +} + +DIV.tabs A:hover +{ + background-position: 100% -150px; +} + +DIV.tabs A:link, DIV.tabs A:visited, +DIV.tabs A:active, DIV.tabs A:hover +{ + color: #1A419D; +} + +DIV.tabs SPAN +{ + float : left; + display : block; + padding : 5px 9px; + white-space : nowrap; +} + +DIV.tabs INPUT +{ + float : right; + display : inline; + font-size : 1em; +} + +DIV.tabs TD +{ + font-size : x-small; + font-weight : bold; + text-decoration : none; +} + + + +/* Commented Backslash Hack hides rule from IE5-Mac \*/ +DIV.tabs SPAN {float : none;} +/* End IE5-Mac hack */ + +DIV.tabs A:hover SPAN +{ + background-position: 0% -150px; +} + +DIV.tabs LI#current A +{ + background-position: 100% -150px; + border-width : 0px; +} + +DIV.tabs LI#current SPAN +{ + background-position: 0% -150px; + padding-bottom : 6px; +} + +DIV.nav +{ + background : none; + border : none; + border-bottom : 1px solid #84B0C7; +} diff --git a/doc/doxygen/stylesheets/doxygen.css b/doc/doxygen/stylesheets/doxygen.css new file mode 100644 index 0000000..9a5803e --- /dev/null +++ b/doc/doxygen/stylesheets/doxygen.css @@ -0,0 +1,310 @@ +BODY,H1,H2,H3,H4,H5,H6,P,CENTER,TD,TH,UL,DL,DIV { + font-family: Geneva, Arial, Helvetica, sans-serif; +} +BODY,TD { + font-size: 90%; +} +H1 { + text-align: center; + font-size: 160%; +} +H2 { + font-size: 120%; +} +H3 { + font-size: 100%; +} +CAPTION { font-weight: bold } +DIV.qindex { + width: 100%; + background-color: #e8eef2; + border: 1px solid #84b0c7; + text-align: center; + margin: 2px; + padding: 2px; + line-height: 140%; +} +DIV.nav { + width: 100%; + background-color: #e8eef2; + border: 1px solid #84b0c7; + text-align: center; + margin: 2px; + padding: 2px; + line-height: 140%; +} +DIV.navtab { + background-color: #e8eef2; + border: 1px solid #84b0c7; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} +TD.navtab { + font-size: 70%; +} +A.qindex { + text-decoration: none; + font-weight: bold; + color: #1A419D; +} +A.qindex:visited { + text-decoration: none; + font-weight: bold; + color: #1A419D +} +A.qindex:hover { + text-decoration: none; + background-color: #ddddff; +} +A.qindexHL { + text-decoration: none; + font-weight: bold; + background-color: #6666cc; + color: #ffffff; + border: 1px double #9295C2; +} +A.qindexHL:hover { + text-decoration: none; + background-color: #6666cc; + color: #ffffff; +} +A.qindexHL:visited { text-decoration: none; background-color: #6666cc; color: #ffffff } +A.el { text-decoration: none; font-weight: bold } +A.elRef { font-weight: bold } +A.code:link { text-decoration: none; font-weight: normal; color: #0000FF} +A.code:visited { text-decoration: none; font-weight: normal; color: #0000FF} +A.codeRef:link { font-weight: normal; color: #0000FF} +A.codeRef:visited { font-weight: normal; color: #0000FF} +A:hover { text-decoration: none; background-color: #f2f2ff } +DL.el { margin-left: -1cm } +.fragment { + font-family: Fixed, monospace; + font-size: 95%; +} +PRE.fragment { + border: 1px solid #CCCCCC; + background-color: #f5f5f5; + margin-top: 4px; + margin-bottom: 4px; + margin-left: 2px; + margin-right: 8px; + padding-left: 6px; + padding-right: 6px; + padding-top: 4px; + padding-bottom: 4px; +} +DIV.ah { background-color: black; font-weight: bold; color: #ffffff; margin-bottom: 3px; margin-top: 3px } +TD.md { background-color: #F4F4FB; font-weight: bold; } +TD.mdPrefix { + background-color: #F4F4FB; + color: #606060; + font-size: 80%; +} +TD.mdname1 { background-color: #F4F4FB; font-weight: bold; color: #602020; } +TD.mdname { background-color: #F4F4FB; font-weight: bold; color: #602020; width: 600px; } +DIV.groupHeader { + margin-left: 16px; + margin-top: 12px; + margin-bottom: 6px; + font-weight: bold; +} +DIV.groupText { margin-left: 16px; font-style: italic; font-size: 90% } +BODY { + background: white; + color: black; + margin-right: 20px; + margin-left: 20px; +} +TD.indexkey { + background-color: #e8eef2; + font-weight: bold; + padding-right : 10px; + padding-top : 2px; + padding-left : 10px; + padding-bottom : 2px; + margin-left : 0px; + margin-right : 0px; + margin-top : 2px; + margin-bottom : 2px; + border: 1px solid #CCCCCC; +} +TD.indexvalue { + background-color: #e8eef2; + font-style: italic; + padding-right : 10px; + padding-top : 2px; + padding-left : 10px; + padding-bottom : 2px; + margin-left : 0px; + margin-right : 0px; + margin-top : 2px; + margin-bottom : 2px; + border: 1px solid #CCCCCC; +} +TR.memlist { + background-color: #f0f0f0; +} +P.formulaDsp { text-align: center; } +IMG.formulaDsp { } +IMG.formulaInl { vertical-align: middle; } +SPAN.keyword { color: #008000 } +SPAN.keywordtype { color: #604020 } +SPAN.keywordflow { color: #e08000 } +SPAN.comment { color: #800000 } +SPAN.preprocessor { color: #806020 } +SPAN.stringliteral { color: #002080 } +SPAN.charliteral { color: #008080 } +.mdTable { + border: 1px solid #868686; + background-color: #F4F4FB; +} +.mdRow { + padding: 8px 10px; +} +.mdescLeft { + padding: 0px 8px 4px 8px; + font-size: 80%; + font-style: italic; + background-color: #FAFAFA; + border-top: 1px none #E0E0E0; + border-right: 1px none #E0E0E0; + border-bottom: 1px none #E0E0E0; + border-left: 1px none #E0E0E0; + margin: 0px; +} +.mdescRight { + padding: 0px 8px 4px 8px; + font-size: 80%; + font-style: italic; + background-color: #FAFAFA; + border-top: 1px none #E0E0E0; + border-right: 1px none #E0E0E0; + border-bottom: 1px none #E0E0E0; + border-left: 1px none #E0E0E0; + margin: 0px; +} +.memItemLeft { + padding: 1px 0px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: solid; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + background-color: #FAFAFA; + font-size: 80%; +} +.memItemRight { + padding: 1px 8px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: solid; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + background-color: #FAFAFA; + font-size: 80%; +} +.memTemplItemLeft { + padding: 1px 0px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: none; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + background-color: #FAFAFA; + font-size: 80%; +} +.memTemplItemRight { + padding: 1px 8px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: none; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + background-color: #FAFAFA; + font-size: 80%; +} +.memTemplParams { + padding: 1px 0px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: solid; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + color: #606060; + background-color: #FAFAFA; + font-size: 80%; +} +.search { color: #003399; + font-weight: bold; +} +FORM.search { + margin-bottom: 0px; + margin-top: 0px; +} +INPUT.search { font-size: 75%; + color: #000080; + font-weight: normal; + background-color: #e8eef2; +} +TD.tiny { font-size: 75%; +} +a { + color: #1A41A8; +} +a:visited { + color: #2A3798; +} +.dirtab { padding: 4px; + border-collapse: collapse; + border: 1px solid #84b0c7; +} +TH.dirtab { background: #e8eef2; + font-weight: bold; +} +HR { height: 1px; + border: none; + border-top: 1px solid #206dcd; +} + diff --git a/hamming.py b/hamming.py index b899edb..cd0fd4e 100644 --- a/hamming.py +++ b/hamming.py @@ -16,20 +16,36 @@ # Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +from translator import * + +## @brief This class calculates the Hamming distance between two strings. +# +# When two given characters differ completely they add 2 to the final distance +# between the strings. Two 'similar' characters (defined by the given translator +# or the default translator if none specified) will add 1 and 0 for two +# identical characters. +# +# This distinction of 'similar' and 'different' characters can be useful to +# 'correct' OCR defects. class Hamming: + ## @brief Calculates Hamming distance between two strings. Optionally a + # translator can be provieded. A default translator will be used if none + # specified. @staticmethod - def simplify( text ): - return text + def hamming( text1, text2, translator = None ): + if not translator: + translator = Translator() + + transText1 = translator.translated( text1 ) + transText2 = translator.translated( text2 ) - @staticmethod - def hamming( text1, text2 ): value = 0 size = min(len(text1), len(text2)) for i in range(size): if text1[i] == text2[i]: continue - if Hamming.simplify( text1[i] ) == Hamming.simplify( text2[i] ): + if transText1[i] == transText2[i]: value += 1 continue value += 2 @@ -37,6 +53,7 @@ class Hamming: return value if __name__ == '__main__': + print Hamming.hamming( 'si', '$l' ) print Hamming.hamming( 'abc', 'abc' ) print Hamming.hamming( 'abcabc', 'abc' ) print Hamming.hamming( 'abcdef', 'abc' ) @@ -44,3 +61,4 @@ if __name__ == '__main__': print Hamming.hamming( 'bcdef', 'abc' ) for x in range(10000): Hamming.hamming( 'text de la plantilla', 'text llarg que pot ser del document que tractem actualment' ) + diff --git a/recognizer.py b/recognizer.py index 28b8a4e..467f6d7 100644 --- a/recognizer.py +++ b/recognizer.py @@ -27,20 +27,6 @@ from trigram import * import tempfile -# Smarter processing models and functions -def translate(text): - txt = text - f=codecs.open('addons/smart_attach/translations.txt', 'r', 'utf-8') - if not f: - print "File not found" - return txt - translations = f.readlines() - f.close() - for x in translations: - for y in x[1:]: - txt = txt.replace( y, x[0] ) - return txt - class Analyze(QThread): def __init__(self, analyzer, image, parent=None): QThread.__init__(self, parent) @@ -185,3 +171,20 @@ class Recognizer(QObject): print "Template %s has score %s with offset (%s,%s)" % (template.name, score, xOffset, yOffset) return best + #def findTemplateOffset( self, template ): + #if not template.boxes: + #return QPoint( 0, 0 ) +# + #lines = self.ocr.textLines() +# + #for templateBox in template.boxes: + #if templateBox.type != 'matcher': + #continue +# + #for line in lines: + #templateBox.text +# + + # Trigram.trigram( lines[0], + + diff --git a/translations.txt b/translations.txt new file mode 100644 index 0000000..e0b4454 --- /dev/null +++ b/translations.txt @@ -0,0 +1,7 @@ +lií +aõ +e€ +s$ş +oº0ø +yv +bh diff --git a/translator.py b/translator.py new file mode 100644 index 0000000..219542a --- /dev/null +++ b/translator.py @@ -0,0 +1,65 @@ +# Copyright (C) 2008 by Albert Cervera i Areny +# albert@nan-tic.com +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +import codecs +import os + +## @brief This class provides a simple way of converting similar characters +# to the same one. +# +# This can proof useful to overcome OCR errors and is used in Hamming class, +# for example. Default translation file provides families of characters. For +# example 's', 'S', '$' are in the same one because the OCR may sometimes +# recognize an 'S' as '$'. 'l', 'i' and '|' are in another family. +# +# The text 'eli a sa|de$' will be converted to 'ell a saldes'. The translator +# replaces any element of a family by the first character of the family it is in. +class Translator: + def __init__(self): + self.translations = None + + ## @brief Sets the translation list. + # + # The list should follow the following structure: [ 'sS$', 'li|', ... ] + def setTranslations(self, translations): + self.translations = translations + + ## @brief Loads the translation list from the given file. + # + # Each character family must be in a different line. See the default + # translations.txt file if you need an example. + def load(self, fileName): + f=codecs.open(fileName, 'r', 'utf-8') + if not f: + print "File not found" + return txt + self.translations = f.readlines() + f.close() + + ## @brief Returns the given text replacing each character with the first + # character of its family or itself if it's not in any character family. + def translated(self, text): + if self.translations == None: + self.load( os.path.join( os.path.abspath(os.path.dirname(__file__)), 'translations.txt' ) ) + + result = text + for x in self.translations: + for y in x[1:]: + result = result.replace( y, x[0] ) + return result +