mirror of https://github.com/NaN-tic/nanscan.git
- Fixed doxygen file.
- Added invoice recognition module. Still missing block detection.
This commit is contained in:
parent
2cbba682f0
commit
0eb944c512
|
@ -0,0 +1,153 @@
|
|||
# Copyright (C) 2009 by Albert Cervera i Areny
|
||||
# albert@nan-tic.com
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
|
||||
from NanScan.LevenshteinDistance import *
|
||||
from NanScan.Range import *
|
||||
from NanScan.TextPatterns import *
|
||||
|
||||
class InvoiceRecognizer:
|
||||
Tags = {
|
||||
'number': {
|
||||
'tag': [
|
||||
u'factura',
|
||||
u'numero factura',
|
||||
u'factura numero',
|
||||
u'num. de factura',
|
||||
u'factura num.'
|
||||
],
|
||||
'type': 'mostly-numeric'
|
||||
},
|
||||
'date': {
|
||||
'tag': [
|
||||
u'fecha',
|
||||
u'fecha factura',
|
||||
u'fecha emision',
|
||||
u'data:',
|
||||
u'data',
|
||||
u'data factura'
|
||||
],
|
||||
'type': 'date'
|
||||
# With dates we need to be able to find a date with
|
||||
# the format '1 Sep. 2009'. Also we need to find the
|
||||
# date without a tag. Something like:
|
||||
#
|
||||
# 'fallback': functionName,
|
||||
#
|
||||
# might be appropiate for those cases in which the
|
||||
# tag can't be found.
|
||||
},
|
||||
'amount': {
|
||||
'tag': [
|
||||
u'total',
|
||||
u'total factura',
|
||||
u'total a pagar (euros)'
|
||||
],
|
||||
'type': 'numeric'
|
||||
}
|
||||
}
|
||||
def recognize(self, recognizer):
|
||||
#text = recognizer.textInRegion('text')
|
||||
analyzer = recognizer.analyzers['text']
|
||||
self.textLines = analyzer.textLinesWithSpaces()
|
||||
result = ''
|
||||
for tag in InvoiceRecognizer.Tags:
|
||||
result += 'Tag: %s, Value: %s\n' % ( tag, self.findTagValue( tag ) )
|
||||
return result
|
||||
|
||||
def formatedLine(self, line):
|
||||
text = u''
|
||||
for c in line:
|
||||
text += c.character
|
||||
return text
|
||||
|
||||
def findText(self, textToFind):
|
||||
ranges = Range.extractAllRangesFromDocument( self.textLines, len(textToFind) )
|
||||
for ran in ranges:
|
||||
text = ran.text()
|
||||
value = Levenshtein.levenshtein( text, textToFind )
|
||||
ran.distance = value
|
||||
ranges.sort( rangeDistanceComparison )
|
||||
if ranges:
|
||||
return ranges[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def findTagValue(self, tag):
|
||||
ranges = []
|
||||
for tagData in InvoiceRecognizer.Tags[tag]['tag']:
|
||||
ran = self.findText( tagData )
|
||||
if ran:
|
||||
ranges.append( ran )
|
||||
ranges.sort( rangeDistanceComparison )
|
||||
#ran = ranges[0]
|
||||
distance = ranges[0].distance
|
||||
sameDistance = [x for x in ranges if x.distance == distance]
|
||||
sameDistance.sort( rangeLengthComparison )
|
||||
#print "SECOND 5: ", [x.text().encode('ascii','ignore') for x in sameDistance[:5]]
|
||||
ran = sameDistance[-1]
|
||||
|
||||
print "RANGE FOR TAG %s: %s" % ( tag, ran.text().encode('ascii','ignore') )
|
||||
|
||||
# Extract text on the right
|
||||
line = self.formatedLine( self.textLines[ ran.line ] )
|
||||
rightValue = line[ran.pos+ran.length+1:].strip().split(' ')[0]
|
||||
print "R: ", line[ran.pos+ran.length+1:].strip().encode('ascii','ignore')
|
||||
print "rightValue: ", rightValue.encode('ascii','ignore')
|
||||
print "SAME LINE: ", line.encode('ascii','ignore')
|
||||
|
||||
# Extract text on the bottom
|
||||
if ran.line < len(self.textLines)-1:
|
||||
line = self.textLines[ran.line+1]
|
||||
print "NEXT LINE: ", self.formatedLine( self.textLines[ran.line+1] ).encode('ascii','ignore')
|
||||
boxBottom = ran.rect()
|
||||
boxBottom.moveTop( line[0].box.y() )
|
||||
bottomValue = u''
|
||||
for c in line:
|
||||
if c.box.intersects( boxBottom ):
|
||||
bottomValue += c.character
|
||||
else:
|
||||
bottomValue = u''
|
||||
|
||||
# Decide which of both values match the given tag type
|
||||
type = InvoiceRecognizer.Tags[ tag ][ 'type' ]
|
||||
if type == 'numeric':
|
||||
if isFloat( rightValue ):
|
||||
return textToFloat( rightValue )
|
||||
elif isFloat( bottomValue ):
|
||||
return textToFloat( bottomValue )
|
||||
else:
|
||||
return None
|
||||
elif type == 'date':
|
||||
if isDate( rightValue ):
|
||||
return textToDate( rightValue )
|
||||
elif isDate( bottomValue ):
|
||||
return textToDate( bottomValue )
|
||||
else:
|
||||
return None
|
||||
elif type == 'mostly-numeric':
|
||||
if isMostlyNumeric( rightValue ):
|
||||
return rightValue
|
||||
elif isMostlyNumeric( bottomValue ):
|
||||
return bottomValue
|
||||
else:
|
||||
return rightValue
|
||||
else:
|
||||
return rightValue
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# Copyright (C) 2009 by Albert Cervera i Areny
|
||||
# albert@nan-tic.com
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
|
107
NanScan/Ocr.py
107
NanScan/Ocr.py
|
@ -44,7 +44,7 @@ def boxComparison(x, y):
|
|||
else:
|
||||
return 0
|
||||
|
||||
## @breif This class allows using an OCR and provides several convenient functions
|
||||
## @brief This class allows using an OCR and provides several convenient functions
|
||||
# regarding text and image processing such as deskewing or obtaining formated text.
|
||||
class Ocr(Analyzer):
|
||||
file = ""
|
||||
|
@ -132,12 +132,12 @@ class Ocr(Analyzer):
|
|||
|
||||
## @brief Returns the text of a given region of the image.
|
||||
# It's the same as calling formatedText().
|
||||
def textInRegion(self, region):
|
||||
def textInRegion(self, region=None):
|
||||
return self.formatedText( region )
|
||||
|
||||
## @brief Returns the bounding rectangle of the text returned by textInRegion for
|
||||
# the given region.
|
||||
def featureRectInRegion(self, region):
|
||||
def featureRectInRegion(self, region=None):
|
||||
lines = self.textLinesWithSpaces( region )
|
||||
rect = QRectF()
|
||||
for line in lines:
|
||||
|
@ -242,6 +242,58 @@ class Ocr(Analyzer):
|
|||
line.sort( boxComparison )
|
||||
return lines
|
||||
|
||||
## @brief This function adds spaces between words of a single line of boxes.
|
||||
def textLineWithSpaces(self, line):
|
||||
width = 0
|
||||
count = 0
|
||||
left = None
|
||||
spacesToAdd = []
|
||||
words = []
|
||||
for c in line:
|
||||
if left:
|
||||
# If separtion between previous and current char
|
||||
# is greater than a third of the average character
|
||||
# width we'll add a space.
|
||||
if c.box.left() - left > ( width / count ) / 3:
|
||||
if spacesToAdd:
|
||||
words.append( line[spacesToAdd[-1]:count] )
|
||||
spacesToAdd.append( count )
|
||||
|
||||
# c.character is already a unicode string
|
||||
left = c.box.right()
|
||||
width += c.box.width()
|
||||
count += 1
|
||||
|
||||
# Try to find out if they are fixed sized characters
|
||||
# We've got some problems with fixed size fonts. In some cases the 'I' letter will
|
||||
# have the width of a pipe but the distance between characters will be fixed. In these
|
||||
# cases it's very probable our algorithm will add incorrect spaces before and/or after
|
||||
# the 'I' letter. This should be fixed by somehow determining if it's a fixed sized
|
||||
# font. The commented code below tries to do just that by calculating distances within
|
||||
# the letters of each word. We need to find out if something like this can work and
|
||||
# use it.
|
||||
#for x in words:
|
||||
#dist = []
|
||||
#for c in range( len(x)-1 ):
|
||||
#dist.append( x[c+1].box.center().x() - x[c].box.center().x() )
|
||||
#print 'Paraula: ', (u''.join( [i.character for i in x] )).encode( 'ascii', 'ignore')
|
||||
#print 'Distancies: ', dist
|
||||
|
||||
|
||||
# Reverse so indexes are still valid after insertions
|
||||
spacesToAdd.reverse()
|
||||
previousIdx = None
|
||||
for idx in spacesToAdd:
|
||||
c = Character()
|
||||
c.character = u' '
|
||||
c.box = QRectF()
|
||||
c.box.setTop( line[idx - 1].box.top() )
|
||||
c.box.setBottom( line[idx - 1].box.bottom() )
|
||||
c.box.setLeft( line[idx - 1].box.right() )
|
||||
c.box.setRight( line[idx].box.left() )
|
||||
line.insert( idx, c )
|
||||
|
||||
|
||||
## @brief This function is similar to textLines() but adds spaces between words.
|
||||
# The result is also a list of lines each line being a list of Character objects.
|
||||
def textLinesWithSpaces(self, region=None):
|
||||
|
@ -257,54 +309,7 @@ class Ocr(Analyzer):
|
|||
# which is quite usual.
|
||||
|
||||
for line in lines:
|
||||
width = 0
|
||||
count = 0
|
||||
left = None
|
||||
spacesToAdd = []
|
||||
words = []
|
||||
for c in line:
|
||||
if left:
|
||||
# If separtion between previous and current char
|
||||
# is greater than a third of the average character
|
||||
# width we'll add a space.
|
||||
if c.box.left() - left > ( width / count ) / 3:
|
||||
if spacesToAdd:
|
||||
words.append( line[spacesToAdd[-1]:count] )
|
||||
spacesToAdd.append( count )
|
||||
|
||||
# c.character is already a unicode string
|
||||
left = c.box.right()
|
||||
width += c.box.width()
|
||||
count += 1
|
||||
|
||||
# Try to find out if they are fixed sized characters
|
||||
# We've got some problems with fixed size fonts. In some cases the 'I' letter will
|
||||
# have the width of a pipe but the distance between characters will be fixed. In these
|
||||
# cases it's very probable our algorithm will add incorrect spaces before and/or after
|
||||
# the 'I' letter. This should be fixed by somehow determining if it's a fixed sized
|
||||
# font. The commented code below tries to do just that by calculating distances within
|
||||
# the letters of each word. We need to find out if something like this can work and
|
||||
# use it.
|
||||
#for x in words:
|
||||
#dist = []
|
||||
#for c in range( len(x)-1 ):
|
||||
#dist.append( x[c+1].box.center().x() - x[c].box.center().x() )
|
||||
#print 'Paraula: ', (u''.join( [i.character for i in x] )).encode( 'ascii', 'ignore')
|
||||
#print 'Distancies: ', dist
|
||||
|
||||
|
||||
# Reverse so indexes are still valid after insertions
|
||||
spacesToAdd.reverse()
|
||||
previousIdx = None
|
||||
for idx in spacesToAdd:
|
||||
c = Character()
|
||||
c.character = u' '
|
||||
c.box = QRectF()
|
||||
c.box.setTop( line[idx - 1].box.top() )
|
||||
c.box.setBottom( line[idx - 1].box.bottom() )
|
||||
c.box.setLeft( line[idx - 1].box.right() )
|
||||
c.box.setRight( line[idx].box.left() )
|
||||
line.insert( idx, c )
|
||||
self.textLineWithSpaces( line )
|
||||
return lines
|
||||
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ from Trigram import *
|
|||
from Hamming import *
|
||||
from LevenshteinDistance import *
|
||||
from Translator import *
|
||||
from Range import *
|
||||
|
||||
import tempfile
|
||||
|
||||
|
@ -70,7 +71,7 @@ class Recognizer(QObject):
|
|||
if type in self.analyzers:
|
||||
return self.analyzers[type].boxes
|
||||
else:
|
||||
return None
|
||||
return []
|
||||
|
||||
def analyzersAvailable(self):
|
||||
return self.analyzers.keys()
|
||||
|
@ -148,11 +149,9 @@ class Recognizer(QObject):
|
|||
# 5 (the default) will make the template move 5 millimeter to the right,
|
||||
# 5 to the left, 5 to the top and 5 to the bottom. This means 121 positions
|
||||
# per template.
|
||||
#
|
||||
# Note that the image must have been scanned (using scan() or startScan())
|
||||
# before using this function.
|
||||
#
|
||||
# TODO: Using offsets to find the best template is easy but highly inefficient.
|
||||
# a smarter solution should be implemented.
|
||||
def findMatchingTemplateByOffset( self, templates, offset = 5 ):
|
||||
max = 0
|
||||
best = {
|
||||
|
@ -200,9 +199,6 @@ class Recognizer(QObject):
|
|||
#
|
||||
# Note that the image must have been scanned (using scan() or startScan())
|
||||
# before using this function.
|
||||
#
|
||||
# TODO: Using offsets to find the best template is easy but highly inefficient.
|
||||
# a smarter solution should be implemented.
|
||||
def findMatchingTemplateByText( self, templates ):
|
||||
max = 0
|
||||
best = {
|
||||
|
@ -224,7 +220,6 @@ class Recognizer(QObject):
|
|||
# Apply template with offset found
|
||||
currentDocument = self.extractWithTemplate( template, offset.x(), offset.y() )
|
||||
for documentBox in currentDocument.boxes:
|
||||
print "Applying..."
|
||||
if documentBox.templateBox.type != 'matcher':
|
||||
continue
|
||||
templateBox = documentBox.templateBox
|
||||
|
@ -373,65 +368,3 @@ class TemplateBoxRangeIterator:
|
|||
break
|
||||
return result
|
||||
|
||||
def rangeDistanceComparison(x, y):
|
||||
if x.distance > y.distance:
|
||||
return 1
|
||||
elif x.distance < y.distance:
|
||||
return -1
|
||||
else:
|
||||
return 0
|
||||
|
||||
## @brief This class represents a group of characters in a document.
|
||||
class Range:
|
||||
def __init__(self):
|
||||
self.line = 0
|
||||
self.pos = 0
|
||||
self.length = 0
|
||||
self.document = None
|
||||
|
||||
## @brief Returns a unicode string with the text of the current range
|
||||
def text(self):
|
||||
line = self.document[self.line]
|
||||
chars = line[self.pos:self.pos + self.length]
|
||||
return u''.join( [x.character for x in chars] )
|
||||
|
||||
## @brief Returns the bounding rectangle of the text in the range
|
||||
def rect(self):
|
||||
line = self.document[self.line]
|
||||
chars = line[self.pos:self.pos + self.length]
|
||||
rect = QRectF()
|
||||
for c in chars:
|
||||
rect = rect.united( c.box )
|
||||
return rect
|
||||
|
||||
## @brief Returns a list with all possible ranges of size length of the
|
||||
# given document
|
||||
@staticmethod
|
||||
def extractAllRangesFromDocument(lines, length, width=0):
|
||||
if length <= 0:
|
||||
return []
|
||||
ranges = []
|
||||
for line in range(len(lines)):
|
||||
if length >= len(lines[line]):
|
||||
ran = Range()
|
||||
ran.line = line
|
||||
ran.pos = 0
|
||||
ran.length = len(lines[line])
|
||||
ran.document = lines
|
||||
#if width:
|
||||
# while ran.rect().width() > width:
|
||||
# ran.length -= 1
|
||||
ranges.append( ran )
|
||||
continue
|
||||
for pos in range(len(lines[line]) - length + 1):
|
||||
ran = Range()
|
||||
ran.line = line
|
||||
ran.pos = pos
|
||||
ran.length = length
|
||||
ran.document = lines
|
||||
#if width:
|
||||
# while ran.rect().width() > width:
|
||||
# ran.length -= 1
|
||||
ranges.append( ran )
|
||||
return ranges
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from PyQt4.QtGui import *
|
||||
from scandialog import *
|
||||
from ScanDialog import *
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
@ -10,7 +10,7 @@ dialog = ScanDialog()
|
|||
if os.name == 'nt':
|
||||
FileSaveThreaded.directory = 'c:\\images'
|
||||
else:
|
||||
FileSaveThreaded.directory = '/tmp'
|
||||
FileSaveThreaded.directory = '/tmp/scan'
|
||||
|
||||
dialog.exec_()
|
||||
|
||||
|
|
|
@ -454,6 +454,7 @@ class MainWindow(QMainWindow):
|
|||
self.connect( self.actionUnzoom, SIGNAL('triggered()'), self.unzoom )
|
||||
self.connect( self.actionFindMatchingTemplateByOffset, SIGNAL('triggered()'), self.findMatchingTemplateByOffset )
|
||||
self.connect( self.actionFindMatchingTemplateByText, SIGNAL('triggered()'), self.findMatchingTemplateByText )
|
||||
self.connect( self.actionRecognizeInvoice, SIGNAL('triggered()'), self.recognizeInvoice )
|
||||
self.toggleImageBoxes()
|
||||
QTimer.singleShot( 1000, self.setup )
|
||||
self.updateTitle()
|
||||
|
@ -486,6 +487,12 @@ class MainWindow(QMainWindow):
|
|||
def findMatchingTemplateByText(self):
|
||||
self.findMatchingTemplate( 'text' )
|
||||
|
||||
def recognizeInvoice(self):
|
||||
from NanScan.Generics.InvoiceRecognizer import InvoiceRecognizer
|
||||
p = InvoiceRecognizer()
|
||||
result = p.recognize( self.recognizer )
|
||||
QMessageBox.information( self, _('Invoice Recognition'), result )
|
||||
|
||||
def findMatchingTemplate(self, type):
|
||||
if type == 'offset':
|
||||
title = _('Template search by offset')
|
||||
|
|
|
@ -13,14 +13,6 @@
|
|||
<string>Planta</string>
|
||||
</property>
|
||||
<widget class="QWidget" name="centralwidget" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>48</y>
|
||||
<width>709</width>
|
||||
<height>439</height>
|
||||
</rect>
|
||||
</property>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_2" >
|
||||
<item>
|
||||
<layout class="QVBoxLayout" >
|
||||
|
@ -35,16 +27,7 @@
|
|||
<property name="windowTitle" >
|
||||
<string/>
|
||||
</property>
|
||||
<widget class="QWidget" name="dockWidgetContents" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>20</y>
|
||||
<width>70</width>
|
||||
<height>399</height>
|
||||
</rect>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QWidget" name="dockWidgetContents" />
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
|
@ -111,7 +94,7 @@
|
|||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>709</width>
|
||||
<height>22</height>
|
||||
<height>25</height>
|
||||
</rect>
|
||||
</property>
|
||||
<widget class="QMenu" name="menuFile" >
|
||||
|
@ -150,6 +133,7 @@
|
|||
<addaction name="actionFindMatchingTemplateByOffset" />
|
||||
<addaction name="actionFindMatchingTemplateByText" />
|
||||
<addaction name="actionDeskew" />
|
||||
<addaction name="actionRecognizeInvoice" />
|
||||
</widget>
|
||||
<addaction name="menuFile" />
|
||||
<addaction name="menuEdit" />
|
||||
|
@ -157,14 +141,6 @@
|
|||
<addaction name="menuView" />
|
||||
</widget>
|
||||
<widget class="QToolBar" name="toolBar" >
|
||||
<property name="geometry" >
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>22</y>
|
||||
<width>709</width>
|
||||
<height>26</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle" >
|
||||
<string>toolBar</string>
|
||||
</property>
|
||||
|
@ -297,6 +273,11 @@
|
|||
<string>Deskew</string>
|
||||
</property>
|
||||
</action>
|
||||
<action name="actionRecognizeInvoice" >
|
||||
<property name="text" >
|
||||
<string>Recognize Invoice</string>
|
||||
</property>
|
||||
</action>
|
||||
</widget>
|
||||
<resources/>
|
||||
<connections/>
|
||||
|
|
|
@ -4,4 +4,5 @@
|
|||
#export PYTHONPATH=/home/albert/python/lib/python:../../bin:../../..
|
||||
# NanScan
|
||||
export PYTHONPATH=..:/home/albert/d/koo
|
||||
export LD_LIBRARY_PATH=/usr/lib
|
||||
./planta.py $1
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
DOXYFILE_ENCODING = UTF-8
|
||||
PROJECT_NAME = NaNScaN
|
||||
PROJECT_NAME = NanScan
|
||||
PROJECT_NUMBER = 1.0
|
||||
OUTPUT_DIRECTORY = .
|
||||
CREATE_SUBDIRS = NO
|
||||
|
@ -87,7 +87,7 @@ WARN_LOGFILE =
|
|||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the input files
|
||||
#---------------------------------------------------------------------------
|
||||
INPUT = ../../NaNScaN
|
||||
INPUT = ../../NanScan
|
||||
INPUT_ENCODING = UTF-8
|
||||
FILE_PATTERNS = *.c \
|
||||
*.cc \
|
||||
|
|
Loading…
Reference in New Issue