nanscan/NanScan/Ocr.py

#   Copyright (C) 2008 by Albert Cervera i Areny
#   albert@nan-tic.com
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the
#   Free Software Foundation, Inc.,
#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

import os
# Do not import everything as Template is defined in string too
from string import lower
import codecs
import tempfile
import shutil
import math

from TemporaryFile import *
from Analyzer import *
from Block import *

from gamera.core import *
from PyQt4.QtCore import *
from PyQt4.QtGui import *


## @brief This class allows using an OCR and provides several convenient functions
# regarding text and image processing such as deskewing or obtaining formated text.
class Ocr(Analyzer):
	file = ""

	## @brief Uses tesseract to recognize text of the current image.
	def tesseract(self):
		directory = tempfile.mkdtemp()
		path = os.path.join( directory, 'tesseract' )
		self.spawn( 'tesseract', self.file, path, '-l', 'spa', 'batch.nochop', 'makebox' )
		f=codecs.open(path + '.txt', 'r', 'utf-8')
		content = f.read()
		f.close()
		shutil.rmtree(directory, True)
		return content

	## @brief Parses tesseract output creating a list of Character objects.
	def parseTesseractOutput(self, input):
		output = []
		# Output example line: "w 116 1724 133 1736"
		# Coordinates start at bottom left corner but we convert this into top left.
		# Convert pixel coordinates into millimeters too.
		for x in input.split('\n'):
			if not x:
				continue
			line = x.split(' ')
			x1 = int(line[1])
			x2 = int(line[3])
			y1 = self.height - int(line[2])
			y2 = self.height - int(line[4])
			width = x2 - x1
			height = y1 - y2

			c = Character()
			c.character = line[0]

			x1 = float(x1) / self.dotsPerMillimeterX
			width = float(width) / self.dotsPerMillimeterX
			y2 = float(y2) / self.dotsPerMillimeterY
			height = float(height) / self.dotsPerMillimeterY
			c.box = QRectF( x1, y2, width, height )
			output.append( c )
		return output

	## @brief Uses cuneiform to recognize text of the current image.
	def cuneiform(self):
		directory = tempfile.mkdtemp()
		path = os.path.join( directory, 'cuneiform.txt' )
		os.spawnlpe(os.P_WAIT, '/home/albert/d/git/cuneiform/bin/cuneiform', '/home/albert/d/git/cuneiform/bin/cuneiform', '-l', 'spa', '-f', 'hocr', '-o', path, self.file, {'LD_LIBRARY_PATH': '/home/albert/d/git/cuneiform/lib'} )
		f=codecs.open(path, 'r', 'utf-8', errors='ignore')
		content = f.read()
		f.close()
		shutil.rmtree(directory, True)
		return content

	## @brief Parses tesseract output creating a list of Character objects.
	def parseCuneiformOutput(self, input):
		output = []
		pos = input.find('\n')+1
		input = input[pos:]
		lines = input.partition('<span ')[2].split('bbox')
		lines = lines[1:-1]
		# Output example: <span title="bbox 391 595 400 621">l</span>
		# Coordinates start at top left corner as we need.
		for line in lines:
			textBox = line.strip().split(' ')
			x1 = int(textBox[0])
			y1 = int(textBox[1])
			x2 = int(textBox[2])
			y2 = int(textBox[3].partition('"')[0])

			width = x2 - x1
			height = y2 - y1

			# Convert pixel coordinates into millimeters too.
			x1 = float(x1) / self.dotsPerMillimeterX
			width = float(width) / self.dotsPerMillimeterX
			y1 = float(y1) / self.dotsPerMillimeterY
			height = float(height) / self.dotsPerMillimeterY

			c = Character()
			c.character = textBox[3].partition('"')[2][1]
			c.box = QRectF( x1, y1, width, height )
			output.append( c )
		return output

	## @brief Returns the text of a given region of the image.
	# It's the same as calling formatedText().
	def textInRegion(self, region=None):
		return self.block.formatedText( region )

	## @brief Returns the bounding rectangle of the text returned by textInRegion for
	# the given region.
	def featureRectInRegion(self, region=None):
		lines = self.block.textLinesWithSpaces( region )
		rect = QRectF()
		for line in lines:
			for c in line:
				rect = rect.united( c.box )
		return rect

	## @brief Uses ImageMagick's 'convert' application to convert the given image
	# (QImage) into gray scale
	def convertToGrayScale(self, image, output):
		input = TemporaryFile.create( '.tif' )
		image.save( input, 'TIFF' )
		os.spawnlp(os.P_WAIT, 'convert', 'convert', '-type', 'grayscale', '-depth', '8', input, output)

	## @brief Uses Gamera OTSU threashold algorithm to convert into binary
	def convertToBinary(self, input, output):
		image = load_image(input)
		# Converting
		img = image.to_greyscale()
		# Thresholding
		onebit = img.otsu_threshold()
		# Saving for tesseract processing
		onebit.save_tiff(output)

	## @brief Scans the given image (QImage) with the OCR.
	def scan(self, image):
		self.image = image
		self.width = self.image.width()
		self.height = self.image.height()
		self.dotsPerMillimeterX = float( self.image.dotsPerMeterX() ) / 1000.0
		self.dotsPerMillimeterY = float( self.image.dotsPerMeterY() ) / 1000.0

		# Tesseract Steps
		#self.file = TemporaryFile.create('.tif')
		#self.convertToGrayScale(image, self.file)
		#txt = lower( self.tesseract() )
		#self.boxes = self.parseTesseractOutput(txt)

		# Cuneiform Steps
		self.file = TemporaryFile.create( '.png' )
		image.save( self.file )
		txt = lower( self.cuneiform() )
		self.boxes = self.parseCuneiformOutput(txt)

		self.block = Block()
		self.block.setBoxes( self.boxes )


	## @brief Calculates slope of text lines
	# This value is used by deskew() function to rotate image and
	# align text horitzontally. Note that the slope can be calculated
	# by the text of only a region of the image.
	#
	# Algorithm:
	#   1- Calculate textLines()
	#   2- For each line with more than three characters calculate the linear
	#      regression (pick up slope) given by the x coordinate of the box and
	#      y as the middle point of the box.
	#   3- Calculate the average of all slopes.
	def slope(self, region=None):
		# TODO: We should probably discard values that highly differ
		# from the average for the final value to be used to rotate.
		lines = self.block.textLines( region )
		slopes = []
		for line in lines:
			if len(line) < 3:
				continue
			x = [b.box.x() for b in line]
			y = [b.box.y()+ (b.box.height()/2) for b in line]
			slope, x, y = linearRegression(x, y)
			slopes.append( slope )
		if len(slopes) == 0:
			return 0

		average = 0
		for x in slopes:
			average += x
		average = average / len(slopes)
		return average

	def deskewOnce(self, region=None):
		slope = self.slope( region )
		transform = QTransform()
		transform.rotateRadians( -math.atan( slope ) )
		self.image = self.image.transformed( transform, Qt.SmoothTransformation )

	def deskew(self, region=None):
		slope = self.slope( region )
		if slope > 0.001:
			self.deskewOnce( self, region )
			slope = self.slope( region )
			if slope > 0.001:
				self.deskewOnce( self, region )

Analyzer.registerAnalyzer( 'text', Ocr )

## @brief Initializes OCR functions that need to be executed once before the library
# can work. Currently only initiates Gamera which is not being used by now.
def initOcrSystem():
	init_gamera()


## @brief This function calculates the linearRegression from a list of points.
# Linear regression of y = ax + b
# Usage
# real, real, real = linearRegression(list, list)
# Returns coefficients to the regression line "y=ax+b" from x[] and y[], and R^2 Value
def linearRegression(X, Y):
	if len(X) != len(Y):
		raise ValueError, 'unequal length'
	N = len(X)
	if N <= 2:
		raise ValueError, 'three or more values needed'
	Sx = Sy = Sxx = Syy = Sxy = 0.0
	for x, y in map(None, X, Y):
		Sx = Sx + x
		Sy = Sy + y
		Sxx = Sxx + x*x
		Syy = Syy + y*y
		Sxy = Sxy + x*y
	det = Sxx * N - Sx * Sx
	a, b = (Sxy * N - Sy * Sx)/det, (Sxx * Sy - Sx * Sxy)/det
	meanerror = residual = 0.0
	for x, y in map(None, X, Y):
		meanerror = meanerror + (y - Sy/N)**2
		residual = residual + (y - a * x - b)**2
	RR = 1 - residual/meanerror
	ss = residual / (N-2)
	Var_a, Var_b = ss * N / det, ss * Sxx / det
	#print "y=ax+b"
	#print "N= %d" % N
	#print "a= %g \pm t_{%d;\alpha/2} %g" % (a, N-2, math.sqrt(Var_a))
	#print "b= %g \pm t_{%d;\alpha/2} %g" % (b, N-2, math.sqrt(Var_b))
	#print "R^2= %g" % RR
	#print "s^2= %g" % ss
	return a, b, RR