- Fixed doxygen file.

- Added invoice recognition module. Still missing block detection.
This commit is contained in:
Albert Cervera i Areny 2009-03-14 18:10:23 +01:00
parent 2cbba682f0
commit 0eb944c512
9 changed files with 251 additions and 152 deletions

View File

@ -0,0 +1,153 @@
# Copyright (C) 2009 by Albert Cervera i Areny
# albert@nan-tic.com
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from NanScan.LevenshteinDistance import *
from NanScan.Range import *
from NanScan.TextPatterns import *
class InvoiceRecognizer:
Tags = {
'number': {
'tag': [
u'numero factura',
u'factura numero',
u'num. de factura',
u'factura num.'
'type': 'mostly-numeric'
'date': {
'tag': [
u'fecha factura',
u'fecha emision',
u'data factura'
'type': 'date'
# With dates we need to be able to find a date with
# the format '1 Sep. 2009'. Also we need to find the
# date without a tag. Something like:
# 'fallback': functionName,
# might be appropiate for those cases in which the
# tag can't be found.
'amount': {
'tag': [
u'total factura',
u'total a pagar (euros)'
'type': 'numeric'
def recognize(self, recognizer):
#text = recognizer.textInRegion('text')
analyzer = recognizer.analyzers['text']
self.textLines = analyzer.textLinesWithSpaces()
result = ''
for tag in InvoiceRecognizer.Tags:
result += 'Tag: %s, Value: %s\n' % ( tag, self.findTagValue( tag ) )
return result
def formatedLine(self, line):
text = u''
for c in line:
text += c.character
return text
def findText(self, textToFind):
ranges = Range.extractAllRangesFromDocument( self.textLines, len(textToFind) )
for ran in ranges:
text = ran.text()
value = Levenshtein.levenshtein( text, textToFind )
ran.distance = value
ranges.sort( rangeDistanceComparison )
if ranges:
return ranges[0]
return None
def findTagValue(self, tag):
ranges = []
for tagData in InvoiceRecognizer.Tags[tag]['tag']:
ran = self.findText( tagData )
if ran:
ranges.append( ran )
ranges.sort( rangeDistanceComparison )
#ran = ranges[0]
distance = ranges[0].distance
sameDistance = [x for x in ranges if x.distance == distance]
sameDistance.sort( rangeLengthComparison )
#print "SECOND 5: ", [x.text().encode('ascii','ignore') for x in sameDistance[:5]]
ran = sameDistance[-1]
print "RANGE FOR TAG %s: %s" % ( tag, ran.text().encode('ascii','ignore') )
# Extract text on the right
line = self.formatedLine( self.textLines[ ran.line ] )
rightValue = line[ran.pos+ran.length+1:].strip().split(' ')[0]
print "R: ", line[ran.pos+ran.length+1:].strip().encode('ascii','ignore')
print "rightValue: ", rightValue.encode('ascii','ignore')
print "SAME LINE: ", line.encode('ascii','ignore')
# Extract text on the bottom
if ran.line < len(self.textLines)-1:
line = self.textLines[ran.line+1]
print "NEXT LINE: ", self.formatedLine( self.textLines[ran.line+1] ).encode('ascii','ignore')
boxBottom = ran.rect()
boxBottom.moveTop( line[0].box.y() )
bottomValue = u''
for c in line:
if c.box.intersects( boxBottom ):
bottomValue += c.character
bottomValue = u''
# Decide which of both values match the given tag type
type = InvoiceRecognizer.Tags[ tag ][ 'type' ]
if type == 'numeric':
if isFloat( rightValue ):
return textToFloat( rightValue )
elif isFloat( bottomValue ):
return textToFloat( bottomValue )
return None
elif type == 'date':
if isDate( rightValue ):
return textToDate( rightValue )
elif isDate( bottomValue ):
return textToDate( bottomValue )
return None
elif type == 'mostly-numeric':
if isMostlyNumeric( rightValue ):
return rightValue
elif isMostlyNumeric( bottomValue ):
return bottomValue
return rightValue
return rightValue

View File

@ -0,0 +1,19 @@
# Copyright (C) 2009 by Albert Cervera i Areny
# albert@nan-tic.com
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

View File

@ -44,7 +44,7 @@ def boxComparison(x, y):
return 0
## @breif This class allows using an OCR and provides several convenient functions
## @brief This class allows using an OCR and provides several convenient functions
# regarding text and image processing such as deskewing or obtaining formated text.
class Ocr(Analyzer):
file = ""
@ -132,12 +132,12 @@ class Ocr(Analyzer):
## @brief Returns the text of a given region of the image.
# It's the same as calling formatedText().
def textInRegion(self, region):
def textInRegion(self, region=None):
return self.formatedText( region )
## @brief Returns the bounding rectangle of the text returned by textInRegion for
# the given region.
def featureRectInRegion(self, region):
def featureRectInRegion(self, region=None):
lines = self.textLinesWithSpaces( region )
rect = QRectF()
for line in lines:
@ -242,6 +242,58 @@ class Ocr(Analyzer):
line.sort( boxComparison )
return lines
## @brief This function adds spaces between words of a single line of boxes.
def textLineWithSpaces(self, line):
width = 0
count = 0
left = None
spacesToAdd = []
words = []
for c in line:
if left:
# If separtion between previous and current char
# is greater than a third of the average character
# width we'll add a space.
if c.box.left() - left > ( width / count ) / 3:
if spacesToAdd:
words.append( line[spacesToAdd[-1]:count] )
spacesToAdd.append( count )
# c.character is already a unicode string
left = c.box.right()
width += c.box.width()
count += 1
# Try to find out if they are fixed sized characters
# We've got some problems with fixed size fonts. In some cases the 'I' letter will
# have the width of a pipe but the distance between characters will be fixed. In these
# cases it's very probable our algorithm will add incorrect spaces before and/or after
# the 'I' letter. This should be fixed by somehow determining if it's a fixed sized
# font. The commented code below tries to do just that by calculating distances within
# the letters of each word. We need to find out if something like this can work and
# use it.
#for x in words:
#dist = []
#for c in range( len(x)-1 ):
#dist.append( x[c+1].box.center().x() - x[c].box.center().x() )
#print 'Paraula: ', (u''.join( [i.character for i in x] )).encode( 'ascii', 'ignore')
#print 'Distancies: ', dist
# Reverse so indexes are still valid after insertions
previousIdx = None
for idx in spacesToAdd:
c = Character()
c.character = u' '
c.box = QRectF()
c.box.setTop( line[idx - 1].box.top() )
c.box.setBottom( line[idx - 1].box.bottom() )
c.box.setLeft( line[idx - 1].box.right() )
c.box.setRight( line[idx].box.left() )
line.insert( idx, c )
## @brief This function is similar to textLines() but adds spaces between words.
# The result is also a list of lines each line being a list of Character objects.
def textLinesWithSpaces(self, region=None):
@ -257,54 +309,7 @@ class Ocr(Analyzer):
# which is quite usual.
for line in lines:
width = 0
count = 0
left = None
spacesToAdd = []
words = []
for c in line:
if left:
# If separtion between previous and current char
# is greater than a third of the average character
# width we'll add a space.
if c.box.left() - left > ( width / count ) / 3:
if spacesToAdd:
words.append( line[spacesToAdd[-1]:count] )
spacesToAdd.append( count )
# c.character is already a unicode string
left = c.box.right()
width += c.box.width()
count += 1
# Try to find out if they are fixed sized characters
# We've got some problems with fixed size fonts. In some cases the 'I' letter will
# have the width of a pipe but the distance between characters will be fixed. In these
# cases it's very probable our algorithm will add incorrect spaces before and/or after
# the 'I' letter. This should be fixed by somehow determining if it's a fixed sized
# font. The commented code below tries to do just that by calculating distances within
# the letters of each word. We need to find out if something like this can work and
# use it.
#for x in words:
#dist = []
#for c in range( len(x)-1 ):
#dist.append( x[c+1].box.center().x() - x[c].box.center().x() )
#print 'Paraula: ', (u''.join( [i.character for i in x] )).encode( 'ascii', 'ignore')
#print 'Distancies: ', dist
# Reverse so indexes are still valid after insertions
previousIdx = None
for idx in spacesToAdd:
c = Character()
c.character = u' '
c.box = QRectF()
c.box.setTop( line[idx - 1].box.top() )
c.box.setBottom( line[idx - 1].box.bottom() )
c.box.setLeft( line[idx - 1].box.right() )
c.box.setRight( line[idx].box.left() )
line.insert( idx, c )
self.textLineWithSpaces( line )
return lines

View File

@ -29,6 +29,7 @@ from Trigram import *
from Hamming import *
from LevenshteinDistance import *
from Translator import *
from Range import *
import tempfile
@ -70,7 +71,7 @@ class Recognizer(QObject):
if type in self.analyzers:
return self.analyzers[type].boxes
return None
return []
def analyzersAvailable(self):
return self.analyzers.keys()
@ -148,11 +149,9 @@ class Recognizer(QObject):
# 5 (the default) will make the template move 5 millimeter to the right,
# 5 to the left, 5 to the top and 5 to the bottom. This means 121 positions
# per template.
# Note that the image must have been scanned (using scan() or startScan())
# before using this function.
# TODO: Using offsets to find the best template is easy but highly inefficient.
# a smarter solution should be implemented.
def findMatchingTemplateByOffset( self, templates, offset = 5 ):
max = 0
best = {
@ -200,9 +199,6 @@ class Recognizer(QObject):
# Note that the image must have been scanned (using scan() or startScan())
# before using this function.
# TODO: Using offsets to find the best template is easy but highly inefficient.
# a smarter solution should be implemented.
def findMatchingTemplateByText( self, templates ):
max = 0
best = {
@ -224,7 +220,6 @@ class Recognizer(QObject):
# Apply template with offset found
currentDocument = self.extractWithTemplate( template, offset.x(), offset.y() )
for documentBox in currentDocument.boxes:
print "Applying..."
if documentBox.templateBox.type != 'matcher':
templateBox = documentBox.templateBox
@ -373,65 +368,3 @@ class TemplateBoxRangeIterator:
return result
def rangeDistanceComparison(x, y):
if x.distance > y.distance:
return 1
elif x.distance < y.distance:
return -1
return 0
## @brief This class represents a group of characters in a document.
class Range:
def __init__(self):
self.line = 0
self.pos = 0
self.length = 0
self.document = None
## @brief Returns a unicode string with the text of the current range
def text(self):
line = self.document[self.line]
chars = line[self.pos:self.pos + self.length]
return u''.join( [x.character for x in chars] )
## @brief Returns the bounding rectangle of the text in the range
def rect(self):
line = self.document[self.line]
chars = line[self.pos:self.pos + self.length]
rect = QRectF()
for c in chars:
rect = rect.united( c.box )
return rect
## @brief Returns a list with all possible ranges of size length of the
# given document
def extractAllRangesFromDocument(lines, length, width=0):
if length <= 0:
return []
ranges = []
for line in range(len(lines)):
if length >= len(lines[line]):
ran = Range()
ran.line = line
ran.pos = 0
ran.length = len(lines[line])
ran.document = lines
#if width:
# while ran.rect().width() > width:
# ran.length -= 1
ranges.append( ran )
for pos in range(len(lines[line]) - length + 1):
ran = Range()
ran.line = line
ran.pos = pos
ran.length = length
ran.document = lines
#if width:
# while ran.rect().width() > width:
# ran.length -= 1
ranges.append( ran )
return ranges

View File

@ -1,5 +1,5 @@
from PyQt4.QtGui import *
from scandialog import *
from ScanDialog import *
import sys
import os
@ -10,7 +10,7 @@ dialog = ScanDialog()
if os.name == 'nt':
FileSaveThreaded.directory = 'c:\\images'
FileSaveThreaded.directory = '/tmp'
FileSaveThreaded.directory = '/tmp/scan'

View File

@ -454,6 +454,7 @@ class MainWindow(QMainWindow):
self.connect( self.actionUnzoom, SIGNAL('triggered()'), self.unzoom )
self.connect( self.actionFindMatchingTemplateByOffset, SIGNAL('triggered()'), self.findMatchingTemplateByOffset )
self.connect( self.actionFindMatchingTemplateByText, SIGNAL('triggered()'), self.findMatchingTemplateByText )
self.connect( self.actionRecognizeInvoice, SIGNAL('triggered()'), self.recognizeInvoice )
QTimer.singleShot( 1000, self.setup )
@ -486,6 +487,12 @@ class MainWindow(QMainWindow):
def findMatchingTemplateByText(self):
self.findMatchingTemplate( 'text' )
def recognizeInvoice(self):
from NanScan.Generics.InvoiceRecognizer import InvoiceRecognizer
p = InvoiceRecognizer()
result = p.recognize( self.recognizer )
QMessageBox.information( self, _('Invoice Recognition'), result )
def findMatchingTemplate(self, type):
if type == 'offset':
title = _('Template search by offset')

View File

@ -13,14 +13,6 @@
<widget class="QWidget" name="centralwidget" >
<property name="geometry" >
<layout class="QHBoxLayout" name="horizontalLayout_2" >
<layout class="QVBoxLayout" >
@ -35,16 +27,7 @@
<property name="windowTitle" >
<widget class="QWidget" name="dockWidgetContents" >
<property name="geometry" >
<widget class="QWidget" name="dockWidgetContents" />
@ -111,7 +94,7 @@
<widget class="QMenu" name="menuFile" >
@ -150,6 +133,7 @@
<addaction name="actionFindMatchingTemplateByOffset" />
<addaction name="actionFindMatchingTemplateByText" />
<addaction name="actionDeskew" />
<addaction name="actionRecognizeInvoice" />
<addaction name="menuFile" />
<addaction name="menuEdit" />
@ -157,14 +141,6 @@
<addaction name="menuView" />
<widget class="QToolBar" name="toolBar" >
<property name="geometry" >
<property name="windowTitle" >
@ -297,6 +273,11 @@
<action name="actionRecognizeInvoice" >
<property name="text" >
<string>Recognize Invoice</string>

View File

@ -4,4 +4,5 @@
#export PYTHONPATH=/home/albert/python/lib/python:../../bin:../../..
# NanScan
export PYTHONPATH=..:/home/albert/d/koo
export LD_LIBRARY_PATH=/usr/lib
./planta.py $1

View File

@ -4,7 +4,7 @@
# Project related configuration options
@ -87,7 +87,7 @@ WARN_LOGFILE =
# configuration options related to the input files
INPUT = ../../NaNScaN
INPUT = ../../NanScan
*.cc \