- Unwritable NaNScaN To NanScan.
- Capitalized file names.
This commit is contained in:
Albert Cervera i Areny 2008-12-29 01:53:29 +01:00
parent 2906e6392d
commit c2fb42ebbd
29 changed files with 2357 additions and 0 deletions

59
NanScan/Analyzer.py Normal file
View File

@ -0,0 +1,59 @@
# coding=iso-8859-1
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import subprocess
import tempfile
import codecs
import os
class Analyzer:
analyzers = {}
def __init__(self):
self.boxes = []
@staticmethod
def registerAnalyzer(name, analyzer):
Analyzer.analyzers[name] = analyzer
@staticmethod
def unregisterAnalyzer(name):
del Analyzer.analyzers[name]
@staticmethod
def create(name):
return Analyzer.analyzers[name]()
def scan(self, image):
pass
def textInRegion(self, region):
pass
def featureRectInRegion(self, region):
pass
# Spawn process and return STDOUT
def spawn(self, command, *args):
print "Started: ", command
command = [ command ] + list( args )
process = subprocess.Popen( command , stdout=subprocess.PIPE )
content = process.communicate()[0]
print "Finished: ", command
return content

View File

@ -0,0 +1,78 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from PyQt4.QtCore import *
from PyQt4.QtGui import *
# Current backend should set ScannerBackend to the class
# implementing the Synchronous interface
ScannerBackend = None
class ScannerError:
NoDeviceFound = 1
CouldNotOpenDevice = 2
AcquisitionError = 3
UnknownError = 4
class Scanner(QObject):
def __init__(self, parent=None):
QObject.__init__(self, parent)
self.resolution = 300
self.duplex = False
def listDevices(self):
scan = BlockingScanner(self)
devices = scan.listDevices()
scan.close()
return devices
def setResolution(self, value):
self.resolution = value
def setDuplex(self, value):
self.duplex = value
def startScan(self):
self.thread = ThreadedScan(self)
self.thread.resolution = self.resolution
self.thread.duplex = self.duplex
self.connect( self.thread, SIGNAL('finished()'), self, SIGNAL('finished()') )
self.connect( self.thread, SIGNAL('scanned(QImage)'), self, SIGNAL('scanned(QImage)') )
self.connect( self.thread, SIGNAL('error(int)'), self, SIGNAL('error(int)') )
self.thread.start()
class ThreadedScan(QThread):
def __init__(self, parent=None):
QThread.__init__(self, parent)
self.resolution = 300
self.duplex = False
def run(self):
s = ScannerBackend()
s.setResolution( self.resolution )
s.setDuplex( self.duplex )
self.connect( s, SIGNAL('scanned(QImage)'), self.scanned, Qt.QueuedConnection )
self.connect( s, SIGNAL('error(int)'), self, SIGNAL('error(int)'), Qt.QueuedConnection )
s.scan()
s.close()
def scanned(self, image):
# As we're now out of the thread, we create a new QImage
# object, otherwise the application will crash
self.emit( SIGNAL('scanned(QImage)'), QImage( image ) )

View File

@ -0,0 +1,94 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from PyQt4.QtCore import *
from PyQt4.QtGui import *
import sane
# PIL Module to convert PIL Image Object to QImage
import ImageQt
import common
class SynchronousScanner(QObject):
def __init__(self, parent=None):
QObject.__init__(self, parent)
sane.init()
self.resolution = 300
self.duplex = False
# Member of SynchronousScanner Interface
def listDevices(self):
# sane.get_devices() returns an structure like the following
# [('epson:libusb:001:004', 'Epson', 'GT-8300', 'flatbed scanner')]
return [x[0] for x in sane.get_devices()]
# Member of SynchronousScanner Interface
def setResolution(self, value):
self.resolution = value
# Member of SynchronousScanner Interface
def setDuplex(self, value):
self.duplex = value
# Member of SynchronousScanner Interface
def scan(self, name=None):
if not name:
devices = self.listDevices()
if not devices:
self.emit( SIGNAL('error(int)'), common.ScannerError.NoDeviceFound )
return
name = devices[0]
try:
print "Trying to open device: ", name
source = sane.open( name )
print "opened ", name
except:
print "error", name
self.emit( SIGNAL('error(int)'), common.ScannerError.CouldNotOpenDevice )
return
source.mode = 'color'
source.resolution = self.resolution
source.depth = 32
print "Multi scan"
iterator = source.multi_scan()
print "yea scan"
while True:
try:
print "Obtaining image..."
image = ImageQt.ImageQt( iterator.next() )
print "Obtain"
res = float(self.resolution) * 1000 / 25.4
image.setDotsPerMeterX( res )
image.setDotsPerMeterY( res )
self.emit( SIGNAL('scanned(QImage)'), image )
except StopIteration, e:
# If StopIteration is raised, then there are no more images in
# the scanner
pass
except:
self.emit( SIGNAL('error(int)'), common.ScannerError.AcquisitionError )
# Member of SynchronousScanner Interface
def close(self):
pass
common.ScannerBackend = SynchronousScanner

View File

@ -0,0 +1,122 @@
import twain, struct, string
from PyQt4.QtCore import *
from PyQt4.QtGui import *
#from common import *
import common
class SynchronousScanner(QObject):
def __init__(self, parent=None):
QObject.__init__(self, parent)
self.manager = None
self.source = None
self.resolution = 300
self.duplex = False
def stripNull(self, s):
offset = string.find(s, '\0')
if s != -1:
s= s[:offset]
return s
# Member of SynchronousScanner Interface
def setResolution(self, value):
self.resolution = value
# Member of SynchronousScanner Interface
def setDuplex(self, value):
self.duplex = value
# Member of SynchronousScanner Interface
def listDevices(self):
manager = twain.SourceManager(0L)
fmtString = "L42sHH4s34s34s34s"
slen = struct.calcsize(fmtString)
self.identity = struct.pack("%ds" % slen, "")
rv = manager.DSM_Entry(twain.DG_CONTROL, twain.DAT_IDENTITY, twain.MSG_GETFIRST, self.identity)
l = []
while rv == twain.TWRC_SUCCESS:
l.append( self.stripNull( self.identity[122:] ) )
rv = manager.DSM_Entry(twain.DG_CONTROL, twain.DAT_IDENTITY, twain.MSG_GETNEXT, self.identity)
return l
def open(self, name):
self.manager = twain.SourceManager( 0, ProductName=name )
if not self.manager:
return
if self.source:
self.source.destroy()
self.source=None
self.source = self.manager.OpenSource()
if self.source:
print "%s: %s" % ( name, self.source.GetSourceName() )
# Member of SynchronousScanner Interface
def scan(self, name=None):
if not name:
l = self.listDevices()
if not l:
print "No device found"
return common.ScannerError.NoDeviceFound
name = l[0]
try:
self.open(name)
except:
return common.ScannerError.CouldNotOpenDevice
if not self.source:
return common.ScannerError.CouldNotOpenDevice
try:
self.source.SetCapability( twain.ICAP_YRESOLUTION, twain.TWTY_FIX32, float(self.resolution) )
self.source.SetCapability( twain.ICAP_XRESOLUTION, twain.TWTY_FIX32, float(self.resolution) )
except:
print "Could not set resolution to '%s'" % self.resolution
pass
try:
self.source.SetCapability( twain.CAP_DUPLEXENABLED, twain.TWTY_BOOL, bool(self.duplex) )
except:
print "Could not set duplex to '%s'" % self.duplex
pass
try:
self.source.RequestAcquire(0, 0)
except:
return common.ScannerError.AcquisitionError
while self.next():
image = self.capture()
if not image:
return common.ScannerError.AcquisitionError
self.emit( SIGNAL('scanned(QImage)'), image )
self.source = None
def next(self):
try:
self.source.GetImageInfo()
return True
except:
return False
def capture(self):
fileName = "tmp.tmp"
try:
(handle, more_to_come) = self.source.XferImageNatively()
except:
return None
twain.DIBToBMFile(handle, fileName)
twain.GlobalHandleFree(handle)
image = QImage( fileName )
res = float( self.resolution ) * 1000 / 25.4
image.setDotsPerMeterX( res )
image.setDotsPerMeterY( res )
return image
# Member of SynchronousScanner Interface
def close(self):
del self.manager
common.ScannerBackend = SynchronousScanner

View File

@ -0,0 +1,18 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

BIN
NanScan/Backends/twain.pyd Normal file

Binary file not shown.

90
NanScan/Barcode.py Normal file
View File

@ -0,0 +1,90 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from PyQt4.QtCore import *
from PyQt4.QtGui import *
# Do not import everything as Template is defined in string too
from string import lower
import os
import tempfile
from TemporaryFile import *
from Analyzer import *
class Box:
def __init__(self):
self.text = None
self.type = None
self.position = None
class Barcode(Analyzer):
def __init__(self):
self.boxes = []
def parseBardecodeOutput(self, content):
# Sample output "818043376500 [type: ean13 at: (1798,936)]"
for line in content.splitlines():
pieces = line.split( ' ' )
box = Box()
box.text = lower(pieces[0])
box.type = pieces[2]
pos = pieces[4].strip( '()]' ).split(',')
x = float(pos[0]) / self.dotsPerMillimeterX
y = float(pos[1]) / self.dotsPerMillimeterY
box.position = QPointF( x, y )
self.boxes.append( box )
def printBoxes(self):
for x in self.boxes:
print "Text: %s, Type: %s, Position: %f, %f" % (x.text, x.type, x.position.x(), x.position.y())
## @brief Returns all barcode values concatenated for a given region of the image.
def textInRegion(self, region):
for x in self.boxes:
if region.contains(x.position):
return unicode(x.text)
# Always return unicode strings
return u''
## @brief Returns the bounding rectangle of the text returned by textInRegion for
# the given region.
def featureRectInRegion(self, region):
rect = QRectF()
for x in self.boxes:
if region.contains(x.position):
rect = rect.united( QRectF( x.position, x.position ) )
return rect
## @brief Scans the given image (QImage) looking for barcodes.
def scan(self, image):
# Clean boxes so scan() can be called more than once
self.boxes = []
# Obtain image resolution
image = QImage( image )
self.dotsPerMillimeterX = float( image.dotsPerMeterX() ) / 1000.0
self.dotsPerMillimeterY = float( image.dotsPerMeterY() ) / 1000.0
file = TemporaryFile.create()
image.save( file, 'PNG' )
command = '/home/albert/d/git/exact-image-0.5.0/objdir/frontends/bardecode'
content = self.spawn( command, file )
self.parseBardecodeOutput( content )
self.printBoxes()
Analyzer.registerAnalyzer( 'barcode', Barcode )

146
NanScan/DataMatrix.py Normal file
View File

@ -0,0 +1,146 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from PyQt4.QtCore import *
from PyQt4.QtGui import *
# Do not import everything as Template is defined in string too
from string import lower
import os
import tempfile
from TemporaryFile import *
from Analyzer import *
class Box:
def __init__(self):
self.text = None
self.position = None
self.size = None
self.dataCodewords = None
self.errorCodewordsd = None
self.dataRegions = None
self.interleavedBlocks = None
self.rotationAngle = None
self.box = None
class DataMatrix(Analyzer):
def __init__(self):
self.boxes = []
# Spawn process and return STDOUT
def outputTextToPoint(self, text):
pos = text.strip('(').strip(')').split(',')
x = float(pos[0]) / self.dotsPerMillimeterX
y = float(pos[1]) / self.dotsPerMillimeterY
return QPointF( x, y )
def parseOutput(self, content):
# Each datamatrix is a line of the output
nextText = False
box = None
lines = content.splitlines()
for x in xrange(len(lines)):
line = lines[x]
if not box and line == ('-' * 50):
continue
if not box:
box = Box()
self.boxes.append( box )
if nextText:
box.text = line
nextText = False
box = None
continue
if line == ('-' * 50):
nextText = True
continue
key, value = line.split(':')
value = value.strip()
if 'Matrix Size' in key:
box.size = value
elif 'Data Codewords' in key:
box.dataCodewords = value
elif 'Error Codewords' in key:
box.errorCodewords = value
elif 'Data Regions' in key:
box.dataRegions = value
elif 'Interleaved Blocks' in key:
box.interleavedBlocks = value
elif 'Rotation Angle' in key:
box.rotationAngle = value
elif 'Corner 0' in key:
box.corner0 = self.outputTextToPoint( value )
elif 'Corner 1' in key:
box.corner1 = self.outputTextToPoint( value )
elif 'Corner 2' in key:
box.corner2 = self.outputTextToPoint( value )
elif 'Corner 3' in key:
box.corner3 = self.outputTextToPoint( value )
r1 = QRectF( box.corner0, box.corner1 )
r2 = QRectF( box.corner2, box.corner3 )
box.box = r1.united( r2 )
def printBoxes(self):
for x in self.boxes:
print "Text: '%s'; Position: %f, %f; Size: %f, %f;" % (x.text, x.box.x(), x.box.y(), x.box.width(), x.box.height() )
## @brief Returns all data matrix values concatenated for a given region of the image.
def textInRegion(self, region):
texts = []
for x in self.boxes:
if region.intersects(x.box):
texts.append( unicode(x.text) )
# Always return unicode strings
return u''.join( texts )
## @brief Returns the bounding rectangle of the text returned by textInRegion for
# the given region.
def featureRectInRegion(self, region):
rect = QRectF()
for x in self.boxes:
if region.intersects(x.box):
rect = rect.united( x.box )
return rect
## @brief Scans the given image (QImage) looking for barcodes.
def scan(self, image):
# Clean boxes so scan() can be called more than once
self.boxes = []
# Obtain image resolution
image = QImage( image )
self.dotsPerMillimeterX = float( image.dotsPerMeterX() ) / 1000.0
self.dotsPerMillimeterY = float( image.dotsPerMeterY() ) / 1000.0
file = TemporaryFile.create()
image.save( file, 'PNG' )
command = 'dmtxread'
content = self.spawn( command, '-n', '-v', file )
self.parseOutput( content )
self.printBoxes()
Analyzer.registerAnalyzer( 'dataMatrix', DataMatrix )
if __name__ == '__main__':
d = DataMatrix()
d.scan( '/tmp/ex3.png' )

33
NanScan/Document.py Normal file
View File

@ -0,0 +1,33 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
class Document:
def __init__(self):
self.name = ''
self.template = None
self.boxes = []
self.formatedText = None
def addBox(self, box):
self.boxes.append( box )
class DocumentBox:
def __init__(self):
self.text = ''
self.templateBox = None

66
NanScan/Hamming.py Normal file
View File

@ -0,0 +1,66 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from Translator import *
## @brief This class calculates the Hamming distance between two strings.
#
# When two given characters differ completely they add 2 to the final distance
# between the strings. Two 'similar' characters (defined by the given translator
# or the default translator if none specified) will add 1 and 0 for two
# identical characters.
#
# This distinction of 'similar' and 'different' characters can be useful to
# 'correct' OCR defects.
class Hamming:
## @brief Calculates Hamming distance between two strings. Optionally a
# translator can be provieded. A default translator will be used if none
# specified.
@staticmethod
def hamming( text1, text2, translator = None ):
if not translator:
translator = Translator()
transText1 = translator.translated( text1 )
transText2 = translator.translated( text2 )
value = 0
size = min(len(text1), len(text2))
for i in range(size):
if text1[i] == text2[i]:
continue
if transText1[i] == transText2[i]:
value += 1
continue
value += 2
# Note that we need to multiply by 2 because 'errors' weight 2
# and 'semi-errors' weight 1
value += abs( len(text1) - len(text2) ) * 2
return value
if __name__ == '__main__':
print Hamming.hamming( 'si', '$l' )
print Hamming.hamming( 'abc', 'abc' )
print Hamming.hamming( 'abcabc', 'abc' )
print Hamming.hamming( 'abcdef', 'abc' )
print Hamming.hamming( 'abcdef', 'bcd' )
print Hamming.hamming( 'bcdef', 'abc' )
for x in range(10000):
Hamming.hamming( 'text de la plantilla', 'text llarg que pot ser del document que tractem actualment' )

65
NanScan/Levenshtein.py Normal file
View File

@ -0,0 +1,65 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# TODO: If available, wrap levenshtein C implementation
class Levenshtein:
@staticmethod
def levenshtein( text1, text2 ):
# Levenshtein distance if one string is empty, is the
# length of the other string, len(text) inserts.
if len(text1) == 0:
return len(text2)
if len(text2) == 0:
return len(text1)
# Build array of len(text1) * len(text2)
d = [ [0] * len(text2) ] * len(text1)
for i in range(len(text1)):
d[i][0] = i
for j in range(len(text2)):
d[0][j] = j
for i in range(len(text1)-1):
for j in range(len(text2)-1):
ip = i+1
jp = j+1
if text1[ip] == text2[jp]:
cost = 0
else:
cost = 1
d[ip][jp] = min(
d[ip-1][jp] + 1, # deletion
d[ip][jp-1] + 1, # insertion
d[ip-1][jp-1] + cost # substitution
)
return d[len(text1)-1][len(text2)-1]
if __name__ == '__main__':
print Levenshtein.levenshtein( 'abc', 'abc' )
print Levenshtein.levenshtein( 'abcabc', 'abc' )
print Levenshtein.levenshtein( 'abcdef', 'abc' )
print Levenshtein.levenshtein( 'abcdef', 'bcd' )
print Levenshtein.levenshtein( 'bcdef', 'abc' )
for x in range(10000):
Levenshtein.levenshtein( 'text de la plantilla', 'text llarg que pot ser del document que tractem actualment' )

373
NanScan/Ocr.py Executable file
View File

@ -0,0 +1,373 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import os
# Do not import everything as Template is defined in string too
from string import lower
import codecs
import tempfile
import shutil
import math
from TemporaryFile import *
from Analyzer import *
from gamera.core import *
from PyQt4.QtCore import *
from PyQt4.QtGui import *
class Character:
def __init__(self):
self.character = None
self.box = None
def boxComparison(x, y):
if x.box.x() > y.box.x():
return 1
elif x.box.x() < y.box.x():
return -1
else:
return 0
## @breif This class allows using an OCR and provides several convenient functions
# regarding text and image processing such as deskewing or obtaining formated text.
class Ocr(Analyzer):
file = ""
## @brief Uses tesseract to recognize text of the current image.
def tesseract(self):
directory = tempfile.mkdtemp()
path = os.path.join( directory, 'tesseract' )
self.spawn( 'tesseract', self.file, path, '-l', 'spa', 'batch.nochop', 'makebox' )
f=codecs.open(path + '.txt', 'r', 'utf-8')
content = f.read()
f.close()
shutil.rmtree(directory, True)
return content
## @brief Uses cuneiform to recognize text of the current image.
def cuneiform(self):
directory = tempfile.mkdtemp()
path = os.path.join( directory, 'cuneiform' )
os.spawnlpe(os.P_WAIT, '/home/albert/d/git/cuneiform/bin/cuneiform', '/home/albert/d/git/cuneiform/bin/cuneiform', self.file, path, '-l', 'spa', 'batch.nochop', {'LD_LIBRARY_PATH': '/home/albert/d/git/cuneiform/lib'} )
f=codecs.open(path + '.txt', 'r', 'utf-8')
content = f.read()
f.close()
shutil.rmtree(directory, True)
return content
## @brief Parses tesseract output creating a list of Character objects.
def parseTesseractOutput(self, input):
output = []
# Output example line: "w 116 1724 133 1736"
# Coordinates start at bottom left corner but we convert this into top left.
# Convert pixel coordinates into millimeters too.
for x in input.split('\n'):
if not x:
continue
line = x.split(' ')
x1 = int(line[1])
x2 = int(line[3])
y1 = self.height - int(line[2])
y2 = self.height - int(line[4])
width = x2 - x1
height = y1 - y2
c = Character()
c.character = line[0]
x1 = float(x1) / self.dotsPerMillimeterX
width = float(width) / self.dotsPerMillimeterX
y2 = float(y2) / self.dotsPerMillimeterY
height = float(height) / self.dotsPerMillimeterY
c.box = QRectF( x1, y2, width, height )
output.append( c )
return output
## @brief Returns the text of a given region of the image.
# It's the same as calling formatedText().
def textInRegion(self, region):
return self.formatedText( region )
## @brief Returns the bounding rectangle of the text returned by textInRegion for
# the given region.
def featureRectInRegion(self, region):
lines = self.textLinesWithSpaces( region )
rect = QRectF()
for line in lines:
for c in line:
rect = rect.united( c.box )
return rect
## @brief Uses ImageMagick's 'convert' application to convert the given image
# (QImage) into gray scale
def convertToGrayScale(self, image, output):
input = TemporaryFile.create( '.tif' )
image.save( input, 'TIFF' )
os.spawnlp(os.P_WAIT, 'convert', 'convert', '-type', 'grayscale', '-depth', '8', input, output)
## @brief Uses Gamera OTSU threashold algorithm to convert into binary
def convertToBinary(self, input, output):
image = load_image(input)
# Converting
img = image.to_greyscale()
# Thresholding
onebit = img.otsu_threshold()
# Saving for tesseract processing
onebit.save_tiff(output)
## @brief Scans the given image (QImage) with the OCR.
def scan(self, image):
self.image = image
self.width = self.image.width()
self.height = self.image.height()
self.dotsPerMillimeterX = float( self.image.dotsPerMeterX() ) / 1000.0
self.dotsPerMillimeterY = float( self.image.dotsPerMeterY() ) / 1000.0
self.file = TemporaryFile.create('.tif')
self.convertToGrayScale(image, self.file)
txt = lower( self.tesseract() )
self.boxes = self.parseTesseractOutput(txt)
## @brief Obtains top most box of the given list
def topMostBox(self, boxes):
top = None
for x in boxes:
if not top or x.box.y() < top.box.y():
top = x
return top
## @brief Obtain text lines in a list of lines where each line is a list
# of ordered characters.
# Note that no spaces are added in this function and each character is a
# Character class instance.
# The algorithm used is pretty simple:
# 1- Put all boxes in a list ('boxes')
# 2- Search top most box, remove from pending 'boxes' and add in a new line
# 3- Search all boxes that vertically intersect with current box, remove from
# pending and add in the current line
# 4- Go to number 2 until all boxes have been processed.
# 5- Sort the characters of each line by the y coordinate.
def textLines(self, region=None):
# If we use 'if region:' instead of comparing with None
# rects with top (or left) >= bottom (or right), will return
# False and thus return _all_ boxes instead of _none_.
# Indeed, 'if region:' is equivalent to 'if region.isValid():'
if region != None:
# Filter out boxes not in the given region
boxes = []
for x in self.boxes:
if region.intersects(x.box):
boxes.append(x)
else:
# Copy as we'll remove items from the list
boxes = self.boxes[:]
lines = []
while boxes:
box = self.topMostBox( boxes )
boxes.remove( box )
line = []
line.append( box )
toRemove = []
for x in boxes:
if x.box.top() > box.box.bottom():
continue
elif x.box.bottom() < box.box.top():
continue
line.append( x )
toRemove.append( x )
for x in toRemove:
boxes.remove( x )
lines.append( line )
# Now that we have all boxes in its line. Sort each of
# them
for line in lines:
line.sort( boxComparison )
return lines
## @brief This function is similar to textLines() but adds spaces between words.
# The result is also a list of lines each line being a list of Character objects.
def textLinesWithSpaces(self, region=None):
lines = self.textLines( region )
# Now we have all lines with their characters in their positions.
# Here we write and add spaces appropiately.
# In order not to be distracted with character widths of letters
# like 'm' or 'i' (which are very wide and narrow), we average
# width of the letters on a per line basis. This shows good
# results, by now, on text with the same char size in the line,
# which is quite usual.
for line in lines:
width = 0
count = 0
left = None
spacesToAdd = []
words = []
for c in line:
if left:
# If separtion between previous and current char
# is greater than a third of the average character
# width we'll add a space.
if c.box.left() - left > ( width / count ) / 3:
if spacesToAdd:
words.append( line[spacesToAdd[-1]:count] )
spacesToAdd.append( count )
# c.character is already a unicode string
left = c.box.right()
width += c.box.width()
count += 1
# Try to find out if they are fixed sized characters
# We've got some problems with fixed size fonts. In some cases the 'I' letter will
# have the width of a pipe but the distance between characters will be fixed. In these
# cases it's very probable our algorithm will add incorrect spaces before and/or after
# the 'I' letter. This should be fixed by somehow determining if it's a fixed sized
# font. The commented code below tries to do just that by calculating distances within
# the letters of each word. We need to find out if something like this can work and
# use it.
#for x in words:
#dist = []
#for c in range( len(x)-1 ):
#dist.append( x[c+1].box.center().x() - x[c].box.center().x() )
#print 'Paraula: ', (u''.join( [i.character for i in x] )).encode( 'ascii', 'ignore')
#print 'Distancies: ', dist
# Reverse so indexes are still valid after insertions
spacesToAdd.reverse()
previousIdx = None
for idx in spacesToAdd:
c = Character()
c.character = u' '
c.box = QRectF()
c.box.setTop( line[idx - 1].box.top() )
c.box.setBottom( line[idx - 1].box.bottom() )
c.box.setLeft( line[idx - 1].box.right() )
c.box.setRight( line[idx].box.left() )
line.insert( idx, c )
return lines
## @brief Returns the text in the given region as a string. Spaces included.
def formatedText(self, region=None):
lines = self.textLinesWithSpaces( region )
texts = []
text = u''
for line in lines:
for c in line:
text += c.character
texts.append(text)
return u'\n'.join( texts )
## @brief Calculates slope of text lines
# This value is used by deskew() function to rotate image and
# align text horitzontally. Note that the slope can be calculated
# by the text of only a region of the image.
#
# Algorithm:
# 1- Calculate textLines()
# 2- For each line with more than three characters calculate the linear
# regression (pick up slope) given by the x coordinate of the box and
# y as the middle point of the box.
# 3- Calculate the average of all slopes.
def slope(self, region=None):
# TODO: We should probably discard values that highly differ
# from the average for the final value to be used to rotate.
lines = self.textLines( region )
slopes = []
for line in lines:
if len(line) < 3:
continue
x = [b.box.x() for b in line]
y = [b.box.y()+ (b.box.height()/2) for b in line]
slope, x, y = linearRegression(x, y)
slopes.append( slope )
if len(slopes) == 0:
return 0
average = 0
for x in slopes:
average += x
average = average / len(slopes)
return average
def deskewOnce(self, region=None):
slope = self.slope( region )
transform = QTransform()
transform.rotateRadians( -math.atan( slope ) )
self.image = self.image.transformed( transform, Qt.SmoothTransformation )
def deskew(self, region=None):
slope = self.slope( region )
if slope > 0.001:
self.deskewOnce( self, region )
slope = self.slope( region )
if slope > 0.001:
self.deskewOnce( self, region )
Analyzer.registerAnalyzer( 'text', Ocr )
## @brief Initializes OCR functions that need to be executed once before the library
# can work. Currently only initiates Gamera which is not being used by now.
def initOcrSystem():
init_gamera()
## @brief This function calculates the linearRegression from a list of points.
# Linear regression of y = ax + b
# Usage
# real, real, real = linearRegression(list, list)
# Returns coefficients to the regression line "y=ax+b" from x[] and y[], and R^2 Value
def linearRegression(X, Y):
if len(X) != len(Y):
raise ValueError, 'unequal length'
N = len(X)
if N <= 2:
raise ValueError, 'three or more values needed'
Sx = Sy = Sxx = Syy = Sxy = 0.0
for x, y in map(None, X, Y):
Sx = Sx + x
Sy = Sy + y
Sxx = Sxx + x*x
Syy = Syy + y*y
Sxy = Sxy + x*y
det = Sxx * N - Sx * Sx
a, b = (Sxy * N - Sy * Sx)/det, (Sxx * Sy - Sx * Sxy)/det
meanerror = residual = 0.0
for x, y in map(None, X, Y):
meanerror = meanerror + (y - Sy/N)**2
residual = residual + (y - a * x - b)**2
RR = 1 - residual/meanerror
ss = residual / (N-2)
Var_a, Var_b = ss * N / det, ss * Sxx / det
#print "y=ax+b"
#print "N= %d" % N
#print "a= %g \pm t_{%d;\alpha/2} %g" % (a, N-2, math.sqrt(Var_a))
#print "b= %g \pm t_{%d;\alpha/2} %g" % (b, N-2, math.sqrt(Var_b))
#print "R^2= %g" % RR
#print "s^2= %g" % ss
return a, b, RR

437
NanScan/Recognizer.py Normal file
View File

@ -0,0 +1,437 @@
# coding=iso-8859-1
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from PyQt4.QtCore import *
from Barcode import *
from Ocr import *
from DataMatrix import *
from Analyzer import *
from Template import *
from Document import *
from Trigram import *
from Hamming import *
from Levenshtein import *
from Translator import *
import tempfile
class Analyze(QThread):
def __init__(self, analyzer, image, parent=None):
QThread.__init__(self, parent)
self.analyzer = analyzer
self.image = image
def run(self):
self.analyzer.scan( self.image )
class Recognizer(QObject):
def __init__(self, parent=None):
QObject.__init__(self, parent)
self.analyzers = {}
for x in Analyzer.analyzers:
self.analyzers[x] = Analyzer.create(x)
self.ocr = self.analyzers['text']
self.image = None
self.threads = []
## @brief Returns the text of a given region of the image.
def textInRegion(self, region, type=None):
if type in self.analyzers.keys():
return self.analyzers[type].textInRegion( region )
else:
return None
## @brief Returns the bounding rectangle of the text returned by textInRegion for
# the given region.
def featureRectInRegion(self, region, type=None):
if type in self.analyzers:
return self.analyzers[type].featureRectInRegion( region )
else:
return None
def boxes(self, type):
if type in self.analyzers:
return self.analyzers[type].boxes
else:
return None
def analyzersAvailable(self):
return self.analyzers.keys()
# Synchronous
def recognize(self, image):
self.image = image
for analyzer in self.analyzers.values():
analyzer.scan( image )
#self.barcode.scan( image )
#self.ocr.scan( image )
## @brief Asynchronous: Starts analyzers in background threads. Emits finished() at the end
def startRecognition(self, image):
self.image = image
self.threads = []
for analyzer in self.analyzers.values():
thread = Analyze( analyzer, image, self )
self.connect( thread, SIGNAL('finished()'), self.recognitionFinished )
self.threads.append( thread )
thread.start()
def recognitionFinished(self):
print "THREAD FINISHED"
for thread in self.threads:
if thread.isRunning():
return
self.emit( SIGNAL('finished()') )
self.threads = []
def filter(self, value, filterType):
numeric = '0123456789'
alphabetic = 'abcçdefghijklmnñopqrstuvwxyz'
if filterType == 'numeric':
return u''.join( [x for x in value if x in numeric] )
elif filterType == 'alphabetic':
return u''.join( [x for x in value if x in alphabetic] )
elif filterType == 'alphanumeric':
return u''.join( [x for x in value if x in numeric+alphabetic] )
elif filterType == 'none':
return value
else:
print "Filter type '%s' not implemented" % filterType
return value
## @brief Extracts the information of the recognized image using the
# given template.
# Optionally an x and y offset can be applied to the template before
# extracting data.
# Note that the image must have been scanned (using scan() or startScan())
# before using this function.
def extractWithTemplate(self, template, xOffset = 0, yOffset = 0):
if not template:
return None
document = Document()
for templateBox in template.boxes:
if not templateBox.text:
continue
rect = QRectF( templateBox.rect )
rect.translate( xOffset, yOffset )
text = self.textInRegion( rect, templateBox.recognizer )
text = self.filter( text, templateBox.filter )
documentBox = DocumentBox()
documentBox.text = text
documentBox.templateBox = templateBox
document.addBox( documentBox )
return document
## @brief Tries to find out the best template in 'templates' for the current
# image.
# Use the optional parameter 'offset' to specify up to how many millimeters
# the template should be translated to find the best match. Setting this to
# 5 (the default) will make the template move 5 millimeter to the right,
# 5 to the left, 5 to the top and 5 to the bottom. This means 121 positions
# per template.
# Note that the image must have been scanned (using scan() or startScan())
# before using this function.
#
# TODO: Using offsets to find the best template is easy but highly inefficient.
# a smarter solution should be implemented.
def findMatchingTemplateByOffset( self, templates, offset = 5 ):
max = 0
best = {
'template': None,
'document': Document(),
'xOffset' : 0,
'yOffset' : 0
}
for template in templates:
if not template.boxes:
continue
# Consider up to 5 millimeter offset
for xOffset in range(-5,6):
for yOffset in range(-5,6):
score = 0
matcherBoxes = 0
currentDocument = self.extractWithTemplate( template, xOffset, yOffset )
for documentBox in currentDocument.boxes:
templateBox = documentBox.templateBox
if documentBox.templateBox.type != 'matcher':
print "Jumping %s due to type %s" % ( templateBox.name, templateBox.type )
continue
matcherBoxes += 1
similarity = Trigram.trigram( documentBox.text, templateBox.text )
score += similarity
score = score / matcherBoxes
if score > max:
max = score
best = {
'template': template,
'document': currentDocument,
'xOffset' : xOffset,
'yOffset' : yOffset
}
print "Template %s has score %s with offset (%s,%s)" % (template.name, score, xOffset, yOffset)
return best
## @brief Tries to find out the best template in 'templates' for the current
# image.
# This algorithm starts by looking for template boxes of type 'matching' in the
# text and then looks if the relative positions of the new document and template
# boxes are similar. This is intended to be faster than exhaustive algorithm used
# in findMatchingTemplateByOffset().
#
# Note that the image must have been scanned (using scan() or startScan())
# before using this function.
#
# TODO: Using offsets to find the best template is easy but highly inefficient.
# a smarter solution should be implemented.
def findMatchingTemplateByText( self, templates ):
max = 0
best = {
'template': None,
'document': Document(),
'xOffset' : 0,
'yOffset' : 0
}
for template in templates:
if not template.boxes:
continue
# Find out template's offset
offset = self.findTemplateOffset( template )
if not offset:
continue
score = 0
matcherBoxes = 0
# Apply template with offset found
currentDocument = self.extractWithTemplate( template, offset.x(), offset.y() )
for documentBox in currentDocument.boxes:
print "Applying..."
if documentBox.templateBox.type != 'matcher':
continue
templateBox = documentBox.templateBox
matcherBoxes += 1
similarity = Trigram.trigram( documentBox.text, templateBox.text )
score += similarity
score = score / matcherBoxes
print "Score: ", score
if score > max:
max = score
best = {
'template': template,
'document': currentDocument,
'xOffset' : offset.x(),
'yOffset' : offset.y()
}
return best
## @brief Returns a QPoint with the offset that needs to be applied to the given
# template to best fit the current image.
def findTemplateOffset( self, template ):
if not template.boxes:
return QPoint( 0, 0 )
lines = self.ocr.textLinesWithSpaces()
print "FORMATED: ", self.ocr.formatedText().encode( 'ascii', 'replace' )
# Create a default translator only once
translator = Translator()
# This list will keep a pointer to each template box of type 'matcher'
matchers = []
for templateBox in template.boxes:
if templateBox.type != 'matcher':
continue
templateBoxText = templateBox.text.strip()
templateBox.ranges = Range.extractAllRangesFromDocument( lines, len(templateBoxText), templateBox.featureRect.width() + 2 )
for ran in templateBox.ranges:
text = ran.text()
#value = Hamming.hamming( text, templateBoxText, translator )
#value = 1.0 - Trigram.trigram( text, templateBoxText )
value = Levenshtein.levenshtein( text, templateBoxText )
ran.distance = value
#print "Comparison: '%s', '%s', '%f'" % (text.encode('ascii','ignore'), templateBoxText, value)
#five = u'|'.join( [ x.text().encode('ascii','ignore') for x in templateBox.ranges[0:200] ])
#print 'First five ranges: ', five
templateBox.ranges.sort( rangeDistanceComparison )
for x in templateBox.ranges[0:20]:
print "Comparison: '%s', '%s', '%f'" % (x.text().encode('ascii','replace'), templateBoxText, x.distance)
#five = u'|'.join( [ x.text().encode('ascii','ignore') for x in templateBox.ranges[0:10] ])
#print 'First five ranges: ', five
if templateBox.ranges:
bestRange = templateBox.ranges[0]
print "The best match for template box '%s' is '%s' with distance %d" % (templateBoxText, bestRange.text().encode('ascii','replace'), bestRange.distance )
matchers.append( templateBox )
# Once we have all ranges sorted for each template box we search which
# range combination matches the template.
iterator = TemplateBoxRangeIterator( matchers )
i = 0
for ranges in iterator:
documentBoxCenter = ranges[0].rect().center()
templateBoxCenter = matchers[0].featureRect.center()
diff = documentBoxCenter - templateBoxCenter
#print "Difference: ", diff
#print "Document: ", documentBoxCenter
#print "Template: ", templateBoxCenter
found = True
for pos in range(1,len(ranges)):
documentBoxCenter = ranges[pos].rect().center()
templateBoxCenter = matchers[pos].featureRect.center()
d = documentBoxCenter - templateBoxCenter
# If difference of relative positions of boxes between
# template and document are bigger than 5mm we discard
# the ranges
#print "Difference in loop: ", d
#print "Document: %s, %s" % ( documentBoxCenter, ranges[pos].rect() )
#print "Template: ", templateBoxCenter
#print "Comparison: %s --- %s" % (abs(d.x()) + 5.0, abs(diff.x() ) )
if abs(d.x() - diff.x()) > 5:
found = False
break
if abs(d.y() - diff.y()) > 5:
found = False
break
if found:
break
i += 1
if i > 1000:
break
if found:
return diff
else:
return None
class TemplateBoxRangeIterator:
def __init__(self, boxes):
self.boxes = boxes
self.pos = [0] * len(self.boxes)
self.loopPos = [0] * len(self.boxes)
self.added = None
def __iter__(self):
return self
def next(self):
result = []
for x in range(len(self.boxes)):
result.append( self.boxes[x].ranges[ self.loopPos[x] ] )
#print '----'
#print (u', '.join( [x.text() for x in result] )).encode('ascii', 'replace')
#print self.pos
#print self.loopPos
if self.loopPos == self.pos:
# Search next value to add
value = float('infinity')
pos = 0
for x in range(len(self.pos)):
if self.pos[x] >= len(self.boxes[x].ranges) - 1:
continue
if self.boxes[x].ranges[ self.pos[x] + 1 ].distance < value:
value = self.boxes[x].ranges[ self.pos[x] + 1 ].distance
self.added = x
# If value is Infinity it means that we reached the end
# of all possible iterations
if value == float('infinity'):
raise StopIteration
self.pos[self.added] += 1
self.loopPos = [0] * len(self.boxes)
self.loopPos[self.added] = self.pos[self.added]
else:
for x in range(len(self.loopPos)):
if x == self.added:
continue
if self.loopPos[x] < self.pos[x]:
self.loopPos[x] += 1
break
return result
def rangeDistanceComparison(x, y):
if x.distance > y.distance:
return 1
elif x.distance < y.distance:
return -1
else:
return 0
## @brief This class represents a group of characters in a document.
class Range:
def __init__(self):
self.line = 0
self.pos = 0
self.length = 0
self.document = None
## @brief Returns a unicode string with the text of the current range
def text(self):
line = self.document[self.line]
chars = line[self.pos:self.pos + self.length]
return u''.join( [x.character for x in chars] )
## @brief Returns the bounding rectangle of the text in the range
def rect(self):
line = self.document[self.line]
chars = line[self.pos:self.pos + self.length]
rect = QRectF()
for c in chars:
rect = rect.united( c.box )
return rect
## @brief Returns a list with all possible ranges of size length of the
# given document
@staticmethod
def extractAllRangesFromDocument(lines, length, width=0):
if length <= 0:
return []
ranges = []
for line in range(len(lines)):
if length >= len(lines[line]):
ran = Range()
ran.line = line
ran.pos = 0
ran.length = len(lines[line])
ran.document = lines
#if width:
# while ran.rect().width() > width:
# ran.length -= 1
ranges.append( ran )
continue
for pos in range(len(lines[line]) - length + 1):
ran = Range()
ran.line = line
ran.pos = pos
ran.length = length
ran.document = lines
#if width:
# while ran.rect().width() > width:
# ran.length -= 1
ranges.append( ran )
return ranges

192
NanScan/ScanDialog.py Normal file
View File

@ -0,0 +1,192 @@
from Scanner import *
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.uic import *
import gettext
import locale
import gc
locale.setlocale(locale.LC_ALL, '')
gettext.bindtextdomain('nanscan', '.')
gettext.textdomain('nanscan')
gettext.install('nanscan', '.', unicode=1)
class ImageItem(QListWidgetItem):
def __init__(self, parent=None):
QListWidgetItem.__init__(self, parent)
self.image = None
self.name = None
self.mark = None
def setName(self, name):
self.name = name
# Image should be a QImage
def setImage(self, image):
self.image = image
self.updateIcon()
# Mark should be a QImage
def setMark(self, mark):
self.mark = mark
self.updateIcon()
def updateIcon(self):
if not self.image or not self.mark:
return
image = self.image.scaled( self.listWidget().iconSize(), Qt.KeepAspectRatio, Qt.SmoothTransformation )
painter = QPainter( image )
painter.drawImage( 5, 5, self.mark )
painter.end()
self.setIcon( QIcon( QPixmap.fromImage( image ) ) )
class ScanDialog(QDialog):
def __init__(self, parent=None):
QDialog.__init__(self, parent)
dir = os.path.abspath( os.path.dirname(__file__) )
QResource.registerResource( os.path.join(dir,'common.rcc') )
loadUi( os.path.join(dir,'ScanDialog.ui'), self )
self.connect( self.pushAccept, SIGNAL('clicked()'), self.accept )
self.connect( self.pushScan, SIGNAL('clicked()'), self.scan )
self.connect( self.pushDuplexScan, SIGNAL('clicked()'), self.duplexScan )
self.uiList.setIconSize( QSize( 128, 128 ) )
self.saving = QSemaphore()
self.setScanning( False )
# By default images are stored as files in
# the temporary directory. The application
# may choose to override the 'thread' propery
# with an appropiate SaveThreaded() subclass
self.saveClass = FileSaveThreaded
def closeEvent(self, event):
if self.hasFinished:
event.accept()
else:
event.ignore()
def setScanning(self, value):
self.scanning = value
self.updateAccept()
def addSaving(self):
self.saving.release()
self.updateAccept()
def removeSaving(self):
self.saving.acquire()
self.updateAccept()
def updateAccept(self):
if self.scanning or self.saving.available():
self.hasFinished = False
self.pushAccept.setEnabled( False )
self.pushScan.setEnabled( False )
self.pushDuplexScan.setEnabled( False )
self.setCursor( Qt.BusyCursor )
else:
self.hasFinished = True
self.pushAccept.setEnabled( True )
self.pushScan.setEnabled( True )
self.pushDuplexScan.setEnabled( True )
self.unsetCursor()
def scan(self):
self.setScanning( True )
self.scan = Scanner(self)
self.scan.setDuplex( False )
self.connect( self.scan, SIGNAL('scanned(QImage)'), self.scanned )
self.connect( self.scan, SIGNAL('error(int)'), self.error )
self.connect( self.scan, SIGNAL('finished()'), self.finished )
self.scan.startScan()
def duplexScan(self):
self.setScanning( True )
self.scan = Scanner(self)
self.scan.setDuplex( True )
self.connect( self.scan, SIGNAL('scanned(QImage)'), self.scanned )
self.connect( self.scan, SIGNAL('error(int)'), self.error )
self.connect( self.scan, SIGNAL('finished()'), self.finished )
self.scan.startScan()
def error(self, code):
if code == ScannerError.NoDeviceFound:
message = 'No device found'
elif code == ScannerError.CouldNotOpenDevice:
message = 'Could not open device'
elif code == ScannerError.AcquisitionError:
message = 'Error acquiring image'
else:
message = 'Unknown error'
self.setScanning( False )
QMessageBox.critical( self, 'Scanning Error', message )
def scanned(self, image):
item = ImageItem( self.uiList )
item.setImage( image )
item.setMark( QImage( ':/images/images/save.png' ) )
item.setName( 'scanned image ' + unicode( QDateTime.currentDateTime().toString() ) )
item.setToolTip( _('Saving image...') )
self.uiList.addItem( item )
self.addSaving()
if self.saveClass:
thread = self.saveClass( self )
thread.item = item
self.connect( thread, SIGNAL('finished()'), self.saved )
thread.start()
def finished(self):
self.setScanning( False )
def saved(self):
self.removeSaving()
thread = self.sender()
if not thread.error:
thread.item.setMark( QImage( ':/images/images/ok.png' ) )
thread.item.setToolTip( _('Image stored successfully') )
else:
thread.item.setMark( QImage( ':/images/images/cancel.png' ) )
thread.item.setToolTip( _('Error storing image') )
# Free memory used by the BIG image
thread.item.image = None
gc.collect()
## @brief Base class for saving the image once scanned.
class SaveThreaded(QThread):
def __init__(self, parent=None):
QThread.__init__(self, parent)
self.item = None
self.error = True
## @brief This class stores an image into a file in the directory
# specified by 'directory' (temporary directory by default).
class FileSaveThreaded(SaveThreaded):
directory = unicode( QDir.tempPath() )
def run(self):
self.error = True
d = QDir( FileSaveThreaded.directory )
d.setSorting( QDir.Name )
l = d.entryList( ['*.png'] )
if l:
last = l[-1]
last = last.split('.')[0]
last = int(last)
else:
last = 0
next = "%06d.png" % (last + 1)
next = os.path.join( FileSaveThreaded.directory, next )
print "saving: ", next
if self.item.image.save( next, 'PNG' ):
self.error = False

77
NanScan/ScanDialog.ui Normal file
View File

@ -0,0 +1,77 @@
<ui version="4.0" >
<class>Dialog</class>
<widget class="QDialog" name="Dialog" >
<property name="geometry" >
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>300</height>
</rect>
</property>
<property name="windowTitle" >
<string>Scanner</string>
</property>
<layout class="QVBoxLayout" >
<item>
<widget class="QListWidget" name="uiList" >
<property name="iconSize" >
<size>
<width>128</width>
<height>128</height>
</size>
</property>
<property name="flow" >
<enum>QListView::LeftToRight</enum>
</property>
<property name="isWrapping" stdset="0" >
<bool>true</bool>
</property>
<property name="resizeMode" >
<enum>QListView::Adjust</enum>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" >
<item>
<widget class="QPushButton" name="pushScan" >
<property name="text" >
<string>&amp;Scan</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="pushDuplexScan" >
<property name="text" >
<string>Scan &amp;Duplex</string>
</property>
</widget>
</item>
<item>
<spacer>
<property name="orientation" >
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" >
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="pushAccept" >
<property name="text" >
<string>&amp;Accept</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>

25
NanScan/Scanner.py Normal file
View File

@ -0,0 +1,25 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import os
from backends.common import *
if os.name == 'nt':
from backends.twain_backend import *
else:
from backends.sane_backend import *

67
NanScan/Template.py Normal file
View File

@ -0,0 +1,67 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from PyQt4.QtGui import *
from PyQt4.QtCore import *
class Template(QObject):
def __init__(self, name):
QObject.__init__(self)
self.id = 0
self.name = name
self.boxes = []
def addBox(self, box):
self.boxes.append( box )
self.emit(SIGNAL('boxAdded(PyQt_PyObject)'), box)
def removeBox(self, box):
self.boxes.remove( box )
self.emit(SIGNAL('boxRemoved(PyQt_PyObject)'), box)
class TemplateBox:
recognizers = ['text', 'barcode', 'dataMatrix']
types = ['matcher','input']
filters = ['none','numeric','alphabetic','alphanumeric']
def __init__(self):
self.rect = QRectF()
# Holds the rect where the actual text/barcode/whatever
# is found in the template
self.featureRect = QRectF()
self.recognizer = 'text'
self.type = 'matcher'
self.filter = 'none'
self.name = ''
self.text = ''
def setType(self, value):
if value not in TemplateBox.types:
raise "Type '%s' not valid" % value
self._type = value
def getType(self):
return self._type
type=property(getType,setType)
def setFilter(self, value):
if value not in TemplateBox.filters:
raise "Filter '%s' not valid" % value
self._filter = value
def getFilter(self):
return self._filter
filter=property(getFilter,setFilter)

27
NanScan/TemporaryFile.py Normal file
View File

@ -0,0 +1,27 @@
import tempfile
import os
## @brief Simplify the task of creating and managing temporary files in a
# secure manner.
#
# Although python already provides functions for creating temporary files
# in nanscan we usually need only the filename and this is passed as a parameter
# to external applications such as convert or tesseract. Using those file names
# in this cases would still make the application vulnerable to race conditions
# so we need a temporary directory in which all such files are created.
# This class ensures this directory is created and one can savely manage temporary
# files.
class TemporaryFile:
directory = None
## @brief Creates a temporary file securely.
# If the temporary directory doesn't exist or has been deleted it's created.
# This will allow the user to remove the directory at 'almost' any time
# without breaking the application, though a way to easily remove temp files
# should be provided, probably.
@staticmethod
def create( suffix='' ):
if not TemporaryFile.directory or not os.path.exists( TemporaryFile.directory ):
TemporaryFile.directory = tempfile.mkdtemp()
fd, name = tempfile.mkstemp( suffix=suffix, dir=TemporaryFile.directory )
return name

65
NanScan/Translator.py Normal file
View File

@ -0,0 +1,65 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import codecs
import os
## @brief This class provides a simple way of converting similar characters
# to the same one.
#
# This can proof useful to overcome OCR errors and is used in Hamming class,
# for example. Default translation file provides families of characters. For
# example 's', 'S', '$' are in the same one because the OCR may sometimes
# recognize an 'S' as '$'. 'l', 'i' and '|' are in another family.
#
# The text 'eli a sa|de$' will be converted to 'ell a saldes'. The translator
# replaces any element of a family by the first character of the family it is in.
class Translator:
def __init__(self):
self.translations = None
## @brief Sets the translation list.
#
# The list should follow the following structure: [ 'sS$', 'li|', ... ]
def setTranslations(self, translations):
self.translations = translations
## @brief Loads the translation list from the given file.
#
# Each character family must be in a different line. See the default
# translations.txt file if you need an example.
def load(self, fileName):
f=codecs.open(fileName, 'r', 'utf-8')
if not f:
print "File not found"
return txt
self.translations = f.readlines()
f.close()
## @brief Returns the given text replacing each character with the first
# character of its family or itself if it's not in any character family.
def translated(self, text):
if self.translations == None:
self.load( os.path.join( os.path.abspath(os.path.dirname(__file__)), 'translations.txt' ) )
result = text
for x in self.translations:
for y in x[1:]:
result = result.replace( y, x[0] )
return result

95
NanScan/Trigram.py Executable file
View File

@ -0,0 +1,95 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
class Trigram:
# Returns a list of the trigrams of a sentence. That is, the list of
# all trigrams of each of the words in a string. Words are currently
# splitted by the space character only.
# Note that more than a list it's a sorted set. So there are no repeated items.
@staticmethod
def trigramList( text ):
words = text.split( ' ' )
l = set()
for x in words:
for y in Trigram.wordTrigramList( x ):
l.add( y )
l = list( l )
l.sort()
return l
# Calculates the list of trigrams contained in a word. If you feed
# this function with an string with spaces they'll be treated like
# normal characters. The usual trigram function is trigramList() which
# returns trigrams for all of it's words.
# Note that more than a list it's a sorted set. So there are no repeated items.
@staticmethod
def wordTrigramList( text ):
l = set()
size = len(text) + 1
text = ' ' + text + ' '
for x in range(size):
l.add( text[x:x+3] )
l = list( l )
l.sort()
return l
# Calculates similarity between two strings using a trigram algorithm.
# This is based in PostgreSQL pg_trgm implementation.
# There's also a commented alternative for the final calculation of the
# distance.
@staticmethod
def trigram( text1, text2 ):
l1 = Trigram.trigramList( text1.lower() )
l2 = Trigram.trigramList( text2.lower() )
size1 = len(l1)
size2 = len(l2)
p1 = 0
p2 = 0
count = 0
while p1 < size1 and p2 < size2:
if l1[p1] < l2[p2]:
p1 += 1
elif l1[p1] > l2[p2]:
p2 += 1
else:
p1 += 1
p2 += 1
count += 1
return float(count) / float( size1 + size2 - count )
# Here another way of calculating the similarity
#if size1 > size2:
#return float(count) / float( size1 )
#else:
#return float(count) / float( size2 )
if __name__ == '__main__':
print Trigram.trigramList( 'abc' )
print Trigram.trigramList( 'abcabc' )
print Trigram.trigramList( 'hola' )
print Trigram.trigramList( 'adeu manelet' )
print Trigram.trigram( 'abc', 'abc' )
print Trigram.trigram( 'abcabc', 'abc' )
print Trigram.trigram( 'abcdef', 'abc' )
print Trigram.trigram( 'abcdef', 'bcd' )
print Trigram.trigram( 'bcdef', 'abc' )

18
NanScan/__init__.py Normal file
View File

@ -0,0 +1,18 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

43
NanScan/change-resolution.py Executable file
View File

@ -0,0 +1,43 @@
#!/usr/bin/python
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from PyQt4.QtCore import *
from PyQt4.QtGui import *
import sys
if len(sys.argv) != 3:
print "change-resolution.py resolution image"
print
print "Sets the resolution of the given image to the given value"
print "Example: ./change-resolution.py 300 image.png"
print
print "This utility has been created because images obtained with"
print "scanimage and scanadf sane utilities don't set resolution"
print "correctly."
sys.exit()
# Convert resolution to dots per meter
resolution = float(sys.argv[1]) * 1000 / 25.4
file = sys.argv[2]
image = QImage( file )
image.setDotsPerMeterX( resolution )
image.setDotsPerMeterY( resolution )
image.save( file )

BIN
NanScan/common.rcc Normal file

Binary file not shown.

44
NanScan/learn.py Normal file
View File

@ -0,0 +1,44 @@
from gamera.core import *
from gamera.knn import *
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from temporaryfile import *
def initOcrSystem():
init_gamera()
class GameraLearn:
def scan(self, image):
print "Saving image..."
output = TemporaryFile.create( '.tif' )
image.save( output, 'TIFF' )
print "Loading image with gamera..."
img = load_image( output )
print "Converting to greyscale..."
img = img.to_greyscale()
print "Thresholding..."
onebit = img.otsu_threshold()
# Get connected components from the image
print "Getting connected components"
ccs = onebit.cc_analysis()
# Classify
#classifier = knn.kNNInteractive()
#classifier.from_xml_filename('training.xml')
#classifier.classify_list_automatic( css )
print "Initiating classifier"
classifier = kNNNonInteractive( ccs )
import ocr
o = ocr.Ocr()
print "Scanning with tesseract"
o.scan( image )
print "Teaching gamera"
for c in ccs:
print "Glyph: ", c
text = o.textInRegion( c )
classifier.classify_glyph_manual( c, text )

24
NanScan/test-barcode.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/python
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from barcode import *
import sys
b = Barcode()
b.scan( sys.argv[-1] )

33
NanScan/test-learn.py Normal file
View File

@ -0,0 +1,33 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import learn
import sys
import codecs
from PyQt4.QtGui import *
learn.initOcrSystem()
c = learn.GameraLearn()
image = QImage()
if not image.load( sys.argv[-1] ):
print 'Error loading image'
os.exit(1)
c.scan( image )

41
NanScan/test-ocr.py Normal file
View File

@ -0,0 +1,41 @@
# Copyright (C) 2008 by Albert Cervera i Areny
# albert@nan-tic.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import ocr
import sys
import codecs
from PyQt4.QtGui import *
ocr.initOcrSystem()
c = ocr.Ocr()
image = QImage()
if not image.load( sys.argv[-1] ):
print 'Error loading image'
os.exit(1)
c.scan( image )
print c.formatedText().encode('ascii','ignore')
f=codecs.open( '/tmp/output.txt', 'w', 'utf-8' )
f.write( c.formatedText() )
f.close()
print c.slope()
c.deskewOnce()
c.image.save( '/tmp/rotated.png', 'PNG' )
#print "Image stored in /tmp/rotated.png"

View File

@ -0,0 +1,18 @@
from PyQt4.QtGui import *
from scandialog import *
import sys
import os
app = QApplication( sys.argv )
dialog = ScanDialog()
if os.name == 'nt':
FileSaveThreaded.directory = 'c:\\images'
else:
FileSaveThreaded.directory = '/tmp'
dialog.exec_()
app.exec_()

7
NanScan/translations.txt Normal file
View File

@ -0,0 +1,7 @@
lií
e€
s$ş
oº0ø
yv
bh