diff --git a/NanScan/Generics/InvoiceRecognizer.py b/NanScan/Generics/InvoiceRecognizer.py
new file mode 100644
index 0000000..b6c44e0
--- /dev/null
+++ b/NanScan/Generics/InvoiceRecognizer.py
@@ -0,0 +1,153 @@
+# Copyright (C) 2009 by Albert Cervera i Areny
+# albert@nan-tic.com
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+from NanScan.LevenshteinDistance import *
+from NanScan.Range import *
+from NanScan.TextPatterns import *
+
+class InvoiceRecognizer:
+ Tags = {
+ 'number': {
+ 'tag': [
+ u'factura',
+ u'numero factura',
+ u'factura numero',
+ u'num. de factura',
+ u'factura num.'
+ ],
+ 'type': 'mostly-numeric'
+ },
+ 'date': {
+ 'tag': [
+ u'fecha',
+ u'fecha factura',
+ u'fecha emision',
+ u'data:',
+ u'data',
+ u'data factura'
+ ],
+ 'type': 'date'
+ # With dates we need to be able to find a date with
+ # the format '1 Sep. 2009'. Also we need to find the
+ # date without a tag. Something like:
+ #
+ # 'fallback': functionName,
+ #
+ # might be appropiate for those cases in which the
+ # tag can't be found.
+ },
+ 'amount': {
+ 'tag': [
+ u'total',
+ u'total factura',
+ u'total a pagar (euros)'
+ ],
+ 'type': 'numeric'
+ }
+ }
+ def recognize(self, recognizer):
+ #text = recognizer.textInRegion('text')
+ analyzer = recognizer.analyzers['text']
+ self.textLines = analyzer.textLinesWithSpaces()
+ result = ''
+ for tag in InvoiceRecognizer.Tags:
+ result += 'Tag: %s, Value: %s\n' % ( tag, self.findTagValue( tag ) )
+ return result
+
+ def formatedLine(self, line):
+ text = u''
+ for c in line:
+ text += c.character
+ return text
+
+ def findText(self, textToFind):
+ ranges = Range.extractAllRangesFromDocument( self.textLines, len(textToFind) )
+ for ran in ranges:
+ text = ran.text()
+ value = Levenshtein.levenshtein( text, textToFind )
+ ran.distance = value
+ ranges.sort( rangeDistanceComparison )
+ if ranges:
+ return ranges[0]
+ else:
+ return None
+
+
+ def findTagValue(self, tag):
+ ranges = []
+ for tagData in InvoiceRecognizer.Tags[tag]['tag']:
+ ran = self.findText( tagData )
+ if ran:
+ ranges.append( ran )
+ ranges.sort( rangeDistanceComparison )
+ #ran = ranges[0]
+ distance = ranges[0].distance
+ sameDistance = [x for x in ranges if x.distance == distance]
+ sameDistance.sort( rangeLengthComparison )
+ #print "SECOND 5: ", [x.text().encode('ascii','ignore') for x in sameDistance[:5]]
+ ran = sameDistance[-1]
+
+ print "RANGE FOR TAG %s: %s" % ( tag, ran.text().encode('ascii','ignore') )
+
+ # Extract text on the right
+ line = self.formatedLine( self.textLines[ ran.line ] )
+ rightValue = line[ran.pos+ran.length+1:].strip().split(' ')[0]
+ print "R: ", line[ran.pos+ran.length+1:].strip().encode('ascii','ignore')
+ print "rightValue: ", rightValue.encode('ascii','ignore')
+ print "SAME LINE: ", line.encode('ascii','ignore')
+
+ # Extract text on the bottom
+ if ran.line < len(self.textLines)-1:
+ line = self.textLines[ran.line+1]
+ print "NEXT LINE: ", self.formatedLine( self.textLines[ran.line+1] ).encode('ascii','ignore')
+ boxBottom = ran.rect()
+ boxBottom.moveTop( line[0].box.y() )
+ bottomValue = u''
+ for c in line:
+ if c.box.intersects( boxBottom ):
+ bottomValue += c.character
+ else:
+ bottomValue = u''
+
+ # Decide which of both values match the given tag type
+ type = InvoiceRecognizer.Tags[ tag ][ 'type' ]
+ if type == 'numeric':
+ if isFloat( rightValue ):
+ return textToFloat( rightValue )
+ elif isFloat( bottomValue ):
+ return textToFloat( bottomValue )
+ else:
+ return None
+ elif type == 'date':
+ if isDate( rightValue ):
+ return textToDate( rightValue )
+ elif isDate( bottomValue ):
+ return textToDate( bottomValue )
+ else:
+ return None
+ elif type == 'mostly-numeric':
+ if isMostlyNumeric( rightValue ):
+ return rightValue
+ elif isMostlyNumeric( bottomValue ):
+ return bottomValue
+ else:
+ return rightValue
+ else:
+ return rightValue
+
diff --git a/NanScan/Generics/__init__.py b/NanScan/Generics/__init__.py
new file mode 100644
index 0000000..923d4c9
--- /dev/null
+++ b/NanScan/Generics/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2009 by Albert Cervera i Areny
+# albert@nan-tic.com
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
diff --git a/NanScan/Ocr.py b/NanScan/Ocr.py
index a32e315..173fc8a 100755
--- a/NanScan/Ocr.py
+++ b/NanScan/Ocr.py
@@ -44,7 +44,7 @@ def boxComparison(x, y):
else:
return 0
-## @breif This class allows using an OCR and provides several convenient functions
+## @brief This class allows using an OCR and provides several convenient functions
# regarding text and image processing such as deskewing or obtaining formated text.
class Ocr(Analyzer):
file = ""
@@ -132,12 +132,12 @@ class Ocr(Analyzer):
## @brief Returns the text of a given region of the image.
# It's the same as calling formatedText().
- def textInRegion(self, region):
+ def textInRegion(self, region=None):
return self.formatedText( region )
## @brief Returns the bounding rectangle of the text returned by textInRegion for
# the given region.
- def featureRectInRegion(self, region):
+ def featureRectInRegion(self, region=None):
lines = self.textLinesWithSpaces( region )
rect = QRectF()
for line in lines:
@@ -242,6 +242,58 @@ class Ocr(Analyzer):
line.sort( boxComparison )
return lines
+ ## @brief This function adds spaces between words of a single line of boxes.
+ def textLineWithSpaces(self, line):
+ width = 0
+ count = 0
+ left = None
+ spacesToAdd = []
+ words = []
+ for c in line:
+ if left:
+ # If separtion between previous and current char
+ # is greater than a third of the average character
+ # width we'll add a space.
+ if c.box.left() - left > ( width / count ) / 3:
+ if spacesToAdd:
+ words.append( line[spacesToAdd[-1]:count] )
+ spacesToAdd.append( count )
+
+ # c.character is already a unicode string
+ left = c.box.right()
+ width += c.box.width()
+ count += 1
+
+ # Try to find out if they are fixed sized characters
+ # We've got some problems with fixed size fonts. In some cases the 'I' letter will
+ # have the width of a pipe but the distance between characters will be fixed. In these
+ # cases it's very probable our algorithm will add incorrect spaces before and/or after
+ # the 'I' letter. This should be fixed by somehow determining if it's a fixed sized
+ # font. The commented code below tries to do just that by calculating distances within
+ # the letters of each word. We need to find out if something like this can work and
+ # use it.
+ #for x in words:
+ #dist = []
+ #for c in range( len(x)-1 ):
+ #dist.append( x[c+1].box.center().x() - x[c].box.center().x() )
+ #print 'Paraula: ', (u''.join( [i.character for i in x] )).encode( 'ascii', 'ignore')
+ #print 'Distancies: ', dist
+
+
+ # Reverse so indexes are still valid after insertions
+ spacesToAdd.reverse()
+ previousIdx = None
+ for idx in spacesToAdd:
+ c = Character()
+ c.character = u' '
+ c.box = QRectF()
+ c.box.setTop( line[idx - 1].box.top() )
+ c.box.setBottom( line[idx - 1].box.bottom() )
+ c.box.setLeft( line[idx - 1].box.right() )
+ c.box.setRight( line[idx].box.left() )
+ line.insert( idx, c )
+
+
## @brief This function is similar to textLines() but adds spaces between words.
# The result is also a list of lines each line being a list of Character objects.
def textLinesWithSpaces(self, region=None):
@@ -257,54 +309,7 @@ class Ocr(Analyzer):
# which is quite usual.
for line in lines:
- width = 0
- count = 0
- left = None
- spacesToAdd = []
- words = []
- for c in line:
- if left:
- # If separtion between previous and current char
- # is greater than a third of the average character
- # width we'll add a space.
- if c.box.left() - left > ( width / count ) / 3:
- if spacesToAdd:
- words.append( line[spacesToAdd[-1]:count] )
- spacesToAdd.append( count )
-
- # c.character is already a unicode string
- left = c.box.right()
- width += c.box.width()
- count += 1
-
- # Try to find out if they are fixed sized characters
- # We've got some problems with fixed size fonts. In some cases the 'I' letter will
- # have the width of a pipe but the distance between characters will be fixed. In these
- # cases it's very probable our algorithm will add incorrect spaces before and/or after
- # the 'I' letter. This should be fixed by somehow determining if it's a fixed sized
- # font. The commented code below tries to do just that by calculating distances within
- # the letters of each word. We need to find out if something like this can work and
- # use it.
- #for x in words:
- #dist = []
- #for c in range( len(x)-1 ):
- #dist.append( x[c+1].box.center().x() - x[c].box.center().x() )
- #print 'Paraula: ', (u''.join( [i.character for i in x] )).encode( 'ascii', 'ignore')
- #print 'Distancies: ', dist
-
-
- # Reverse so indexes are still valid after insertions
- spacesToAdd.reverse()
- previousIdx = None
- for idx in spacesToAdd:
- c = Character()
- c.character = u' '
- c.box = QRectF()
- c.box.setTop( line[idx - 1].box.top() )
- c.box.setBottom( line[idx - 1].box.bottom() )
- c.box.setLeft( line[idx - 1].box.right() )
- c.box.setRight( line[idx].box.left() )
- line.insert( idx, c )
+ self.textLineWithSpaces( line )
return lines
diff --git a/NanScan/Recognizer.py b/NanScan/Recognizer.py
index c1866aa..91627cf 100644
--- a/NanScan/Recognizer.py
+++ b/NanScan/Recognizer.py
@@ -29,6 +29,7 @@ from Trigram import *
from Hamming import *
from LevenshteinDistance import *
from Translator import *
+from Range import *
import tempfile
@@ -70,7 +71,7 @@ class Recognizer(QObject):
if type in self.analyzers:
return self.analyzers[type].boxes
else:
- return None
+ return []
def analyzersAvailable(self):
return self.analyzers.keys()
@@ -148,11 +149,9 @@ class Recognizer(QObject):
# 5 (the default) will make the template move 5 millimeter to the right,
# 5 to the left, 5 to the top and 5 to the bottom. This means 121 positions
# per template.
+ #
# Note that the image must have been scanned (using scan() or startScan())
# before using this function.
- #
- # TODO: Using offsets to find the best template is easy but highly inefficient.
- # a smarter solution should be implemented.
def findMatchingTemplateByOffset( self, templates, offset = 5 ):
max = 0
best = {
@@ -200,9 +199,6 @@ class Recognizer(QObject):
#
# Note that the image must have been scanned (using scan() or startScan())
# before using this function.
- #
- # TODO: Using offsets to find the best template is easy but highly inefficient.
- # a smarter solution should be implemented.
def findMatchingTemplateByText( self, templates ):
max = 0
best = {
@@ -224,7 +220,6 @@ class Recognizer(QObject):
# Apply template with offset found
currentDocument = self.extractWithTemplate( template, offset.x(), offset.y() )
for documentBox in currentDocument.boxes:
- print "Applying..."
if documentBox.templateBox.type != 'matcher':
continue
templateBox = documentBox.templateBox
@@ -373,65 +368,3 @@ class TemplateBoxRangeIterator:
break
return result
-def rangeDistanceComparison(x, y):
- if x.distance > y.distance:
- return 1
- elif x.distance < y.distance:
- return -1
- else:
- return 0
-
-## @brief This class represents a group of characters in a document.
-class Range:
- def __init__(self):
- self.line = 0
- self.pos = 0
- self.length = 0
- self.document = None
-
- ## @brief Returns a unicode string with the text of the current range
- def text(self):
- line = self.document[self.line]
- chars = line[self.pos:self.pos + self.length]
- return u''.join( [x.character for x in chars] )
-
- ## @brief Returns the bounding rectangle of the text in the range
- def rect(self):
- line = self.document[self.line]
- chars = line[self.pos:self.pos + self.length]
- rect = QRectF()
- for c in chars:
- rect = rect.united( c.box )
- return rect
-
- ## @brief Returns a list with all possible ranges of size length of the
- # given document
- @staticmethod
- def extractAllRangesFromDocument(lines, length, width=0):
- if length <= 0:
- return []
- ranges = []
- for line in range(len(lines)):
- if length >= len(lines[line]):
- ran = Range()
- ran.line = line
- ran.pos = 0
- ran.length = len(lines[line])
- ran.document = lines
- #if width:
- # while ran.rect().width() > width:
- # ran.length -= 1
- ranges.append( ran )
- continue
- for pos in range(len(lines[line]) - length + 1):
- ran = Range()
- ran.line = line
- ran.pos = pos
- ran.length = length
- ran.document = lines
- #if width:
- # while ran.rect().width() > width:
- # ran.length -= 1
- ranges.append( ran )
- return ranges
-
diff --git a/NanScan/test-scandialog.py b/NanScan/test-scandialog.py
index 63c6cca..975ce1a 100644
--- a/NanScan/test-scandialog.py
+++ b/NanScan/test-scandialog.py
@@ -1,5 +1,5 @@
from PyQt4.QtGui import *
-from scandialog import *
+from ScanDialog import *
import sys
import os
@@ -10,7 +10,7 @@ dialog = ScanDialog()
if os.name == 'nt':
FileSaveThreaded.directory = 'c:\\images'
else:
- FileSaveThreaded.directory = '/tmp'
+ FileSaveThreaded.directory = '/tmp/scan'
dialog.exec_()
diff --git a/Planta/MainWindow.py b/Planta/MainWindow.py
index b177af3..fb8544b 100644
--- a/Planta/MainWindow.py
+++ b/Planta/MainWindow.py
@@ -454,6 +454,7 @@ class MainWindow(QMainWindow):
self.connect( self.actionUnzoom, SIGNAL('triggered()'), self.unzoom )
self.connect( self.actionFindMatchingTemplateByOffset, SIGNAL('triggered()'), self.findMatchingTemplateByOffset )
self.connect( self.actionFindMatchingTemplateByText, SIGNAL('triggered()'), self.findMatchingTemplateByText )
+ self.connect( self.actionRecognizeInvoice, SIGNAL('triggered()'), self.recognizeInvoice )
self.toggleImageBoxes()
QTimer.singleShot( 1000, self.setup )
self.updateTitle()
@@ -486,6 +487,12 @@ class MainWindow(QMainWindow):
def findMatchingTemplateByText(self):
self.findMatchingTemplate( 'text' )
+ def recognizeInvoice(self):
+ from NanScan.Generics.InvoiceRecognizer import InvoiceRecognizer
+ p = InvoiceRecognizer()
+ result = p.recognize( self.recognizer )
+ QMessageBox.information( self, _('Invoice Recognition'), result )
+
def findMatchingTemplate(self, type):
if type == 'offset':
title = _('Template search by offset')
diff --git a/Planta/mainwindow.ui b/Planta/mainwindow.ui
index d203484..2f15d73 100644
--- a/Planta/mainwindow.ui
+++ b/Planta/mainwindow.ui
@@ -13,14 +13,6 @@
Planta
-
-
- 0
- 48
- 709
- 439
-
-
-
@@ -35,16 +27,7 @@
-
-
-
- 0
- 20
- 70
- 399
-
-
-
+
@@ -111,7 +94,7 @@
0
0
709
- 22
+ 25
@@ -157,14 +141,6 @@
-
-
- 0
- 22
- 709
- 26
-
-
toolBar
@@ -297,6 +273,11 @@
Deskew
+
+
+ Recognize Invoice
+
+
diff --git a/Planta/planta.sh b/Planta/planta.sh
index e846e3d..cdac317 100755
--- a/Planta/planta.sh
+++ b/Planta/planta.sh
@@ -4,4 +4,5 @@
#export PYTHONPATH=/home/albert/python/lib/python:../../bin:../../..
# NanScan
export PYTHONPATH=..:/home/albert/d/koo
+export LD_LIBRARY_PATH=/usr/lib
./planta.py $1
diff --git a/doc/doxygen/nanscan.doxyfile b/doc/doxygen/nanscan.doxyfile
index be6106b..455fbf0 100644
--- a/doc/doxygen/nanscan.doxyfile
+++ b/doc/doxygen/nanscan.doxyfile
@@ -4,7 +4,7 @@
# Project related configuration options
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
-PROJECT_NAME = NaNScaN
+PROJECT_NAME = NanScan
PROJECT_NUMBER = 1.0
OUTPUT_DIRECTORY = .
CREATE_SUBDIRS = NO
@@ -87,7 +87,7 @@ WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
-INPUT = ../../NaNScaN
+INPUT = ../../NanScan
INPUT_ENCODING = UTF-8
FILE_PATTERNS = *.c \
*.cc \