Initial import of tesseract-1.04b from pkgsrc-wip (packaged by heinz@

and myself):

This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO
OUTPUT FORMATTING, and NO UI. It can only process an image of a
single column and create text from it. It can detect fixed pitch
vs proportional text.  Having said that, in 1995, this engine was
in the top 3 in terms of character accuracy, and it compiles and
runs on both Linux and Windows. Another current limitation is that
it only recognizes English and its character set is only US-ASCII.
Training code IS included in the open source release however, and
will be included in a future release.
This commit is contained in:
wiz 2007-05-18 06:39:27 +00:00
parent 539cc301cc
commit e899e6021c
9 changed files with 396 additions and 0 deletions

9
graphics/tesseract/DESCR Normal file
View file

@ -0,0 +1,9 @@
This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO
OUTPUT FORMATTING, and NO UI. It can only process an image of a
single column and create text from it. It can detect fixed pitch
vs proportional text. Having said that, in 1995, this engine was
in the top 3 in terms of character accuracy, and it compiles and
runs on both Linux and Windows. Another current limitation is that
it only recognizes English and its character set is only US-ASCII.
Training code IS included in the open source release however, and
will be included in a future release.

View file

@ -0,0 +1,32 @@
# $NetBSD: Makefile,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
#
DISTNAME= tesseract-1.04b
CATEGORIES= graphics
MASTER_SITES= http://tesseract-ocr.googlecode.com/files/
MAINTAINER= pkgsrc-users@NetBSD.org
HOMEPAGE= http://code.google.com/p/tesseract-ocr/
COMMENT= Commercial quality open source OCR engine
PKG_DESTDIR_SUPPORT=user-destdir
GNU_CONFIGURE= yes
USE_LANGUAGES= c c++
WRKSRC= ${WRKDIR}/tesseract-1.04
post-build:
${SED} -e "s,@PREFIX@,${PREFIX}," ${FILESDIR}/tesseract.sh \
> ${WRKSRC}/tesseract.sh
post-install:
${INSTALL_LIB_DIR} ${DESTDIR}${PREFIX}/libexec
${MV} ${DESTDIR}${PREFIX}/bin/tesseract ${DESTDIR}${PREFIX}/libexec
${INSTALL_SCRIPT} ${WRKSRC}/tesseract.sh ${DESTDIR}${PREFIX}/bin/tesseract
${INSTALL_DATA_DIR} ${DESTDIR}${PREFIX}/share/doc/tesseract
${INSTALL_DATA} ${WRKSRC}/README ${DESTDIR}${PREFIX}/share/doc/tesseract
${INSTALL_DATA_DIR} ${DESTDIR}${PREFIX}/share/tesseract
${INSTALL_DATA} ${WRKSRC}/phototest.tif ${DESTDIR}${PREFIX}/share/tesseract
.include "../../graphics/tiff/buildlink3.mk"
.include "../../mk/bsd.pkg.mk"

286
graphics/tesseract/PLIST Normal file
View file

@ -0,0 +1,286 @@
@comment $NetBSD: PLIST,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
bin/cntraining
bin/mftraining
bin/tesseract
include/tesseract/adaptions.h
include/tesseract/adaptive.h
include/tesseract/adaptmatch.h
include/tesseract/applybox.h
include/tesseract/associate.h
include/tesseract/badwords.h
include/tesseract/baseapi.h
include/tesseract/basedir.h
include/tesseract/baseline.h
include/tesseract/bestfirst.h
include/tesseract/bits16.h
include/tesseract/bitstrm.h
include/tesseract/bitvec.h
include/tesseract/blckerr.h
include/tesseract/blkocc.h
include/tesseract/blobbox.h
include/tesseract/blobclass.h
include/tesseract/blobcmp.h
include/tesseract/blobcmpl.h
include/tesseract/blobs.h
include/tesseract/blread.h
include/tesseract/callcpp.h
include/tesseract/callnet.h
include/tesseract/charcut.h
include/tesseract/charsample.h
include/tesseract/chartoname.h
include/tesseract/choicearr.h
include/tesseract/choices.h
include/tesseract/chop.h
include/tesseract/chopper.h
include/tesseract/closed.h
include/tesseract/clst.h
include/tesseract/cluster.h
include/tesseract/clusttool.h
include/tesseract/cmndwin.h
include/tesseract/cnTraining.dsp
include/tesseract/const.h
include/tesseract/context.h
include/tesseract/control.h
include/tesseract/coutln.h
include/tesseract/crakedge.h
include/tesseract/cutil.h
include/tesseract/cutoffs.h
include/tesseract/danerror.h
include/tesseract/dawg.h
include/tesseract/debug.h
include/tesseract/debugwin.h
include/tesseract/djmenus.h
include/tesseract/dlltest.cpp
include/tesseract/dlltest.dsp
include/tesseract/docqual.h
include/tesseract/drawedg.h
include/tesseract/drawfx.h
include/tesseract/drawtord.h
include/tesseract/edgblob.h
include/tesseract/edgloop.h
include/tesseract/efio.h
include/tesseract/elst.h
include/tesseract/elst2.h
include/tesseract/emalloc.h
include/tesseract/errcode.h
include/tesseract/evntlst.h
include/tesseract/evnts.h
include/tesseract/expandblob.h
include/tesseract/extern.h
include/tesseract/extract.h
include/tesseract/featdefs.h
include/tesseract/fileerr.h
include/tesseract/findseam.h
include/tesseract/fixspace.h
include/tesseract/fixxht.h
include/tesseract/flexfx.h
include/tesseract/float2int.h
include/tesseract/fpchop.h
include/tesseract/fpoint.h
include/tesseract/freelist.h
include/tesseract/funcdefs.h
include/tesseract/fxdefs.h
include/tesseract/fxid.h
include/tesseract/gap_map.h
include/tesseract/genblob.h
include/tesseract/general.h
include/tesseract/globaloc.h
include/tesseract/globals.h
include/tesseract/gradechop.h
include/tesseract/grphics.h
include/tesseract/grphshm.h
include/tesseract/hashfn.h
include/tesseract/heuristic.h
include/tesseract/hideedge.h
include/tesseract/host.h
include/tesseract/hosthplb.h
include/tesseract/hpddef.h
include/tesseract/hpdsizes.h
include/tesseract/hyphen.h
include/tesseract/img.h
include/tesseract/imgbmp.h
include/tesseract/imgerrs.h
include/tesseract/imgio.h
include/tesseract/imgs.h
include/tesseract/imgscale.h
include/tesseract/imgtiff.h
include/tesseract/imgunpk.h
include/tesseract/intfx.h
include/tesseract/intmatcher.h
include/tesseract/intproto.h
include/tesseract/ipoints.h
include/tesseract/kdtree.h
include/tesseract/labls.h
include/tesseract/linlsq.h
include/tesseract/listio.h
include/tesseract/lmedsq.h
include/tesseract/lsterr.h
include/tesseract/mainblk.h
include/tesseract/makechop.h
include/tesseract/makerow.h
include/tesseract/matchdefs.h
include/tesseract/matchtab.h
include/tesseract/matmatch.h
include/tesseract/matrix.h
include/tesseract/measure.h
include/tesseract/memblk.h
include/tesseract/memry.h
include/tesseract/memryerr.h
include/tesseract/mergenf.h
include/tesseract/metrics.h
include/tesseract/mf.h
include/tesseract/mfTraining.dsp
include/tesseract/mfcpch.cpp
include/tesseract/mfcpch.h
include/tesseract/mfdefs.h
include/tesseract/mfoutline.h
include/tesseract/mfvars.h
include/tesseract/mfx.h
include/tesseract/minmax.h
include/tesseract/mod128.h
include/tesseract/msmenus.h
include/tesseract/name2char.h
include/tesseract/ndminx.h
include/tesseract/normalis.h
include/tesseract/normfeat.h
include/tesseract/normmatch.h
include/tesseract/notdll.h
include/tesseract/nwmain.h
include/tesseract/ocrblock.h
include/tesseract/ocrclass.h
include/tesseract/ocrfeatures.h
include/tesseract/ocrrow.h
include/tesseract/ocrshell.h
include/tesseract/oldbasel.h
include/tesseract/oldheap.h
include/tesseract/oldlist.h
include/tesseract/olutil.h
include/tesseract/outfeat.h
include/tesseract/outlines.h
include/tesseract/output.h
include/tesseract/pageblk.h
include/tesseract/pageres.h
include/tesseract/pagewalk.h
include/tesseract/paircmp.h
include/tesseract/pdblock.h
include/tesseract/pdclass.h
include/tesseract/permdawg.h
include/tesseract/permnum.h
include/tesseract/permute.h
include/tesseract/pgedit.h
include/tesseract/pgeditx.h
include/tesseract/picofeat.h
include/tesseract/pieces.h
include/tesseract/pithsync.h
include/tesseract/pitsync1.h
include/tesseract/platform.h
include/tesseract/plotedges.h
include/tesseract/plotseg.h
include/tesseract/points.h
include/tesseract/polyaprx.h
include/tesseract/polyblk.h
include/tesseract/polyblob.h
include/tesseract/polyvert.h
include/tesseract/poutline.h
include/tesseract/protos.h
include/tesseract/quadlsq.h
include/tesseract/quadratc.h
include/tesseract/quspline.h
include/tesseract/ratngs.h
include/tesseract/rect.h
include/tesseract/rejctmap.h
include/tesseract/reject.h
include/tesseract/render.h
include/tesseract/rwpoly.h
include/tesseract/sbdmenu.h
include/tesseract/sbgconst.h
include/tesseract/sbgdefs.h
include/tesseract/sbgtypes.h
include/tesseract/scaleimg.h
include/tesseract/scanedg.h
include/tesseract/scanutils.cpp
include/tesseract/scanutils.h
include/tesseract/seam.h
include/tesseract/secname.h
include/tesseract/serialis.h
include/tesseract/showim.h
include/tesseract/sigmenu.h
include/tesseract/sortflts.h
include/tesseract/speckle.h
include/tesseract/split.h
include/tesseract/states.h
include/tesseract/statistc.h
include/tesseract/stderr.h
include/tesseract/stepblob.h
include/tesseract/stopper.h
include/tesseract/strngs.h
include/tesseract/structures.h
include/tesseract/submen.h
include/tesseract/tally.h
include/tesseract/tessarray.h
include/tesseract/tessbox.h
include/tesseract/tessclas.h
include/tesseract/tessedit.h
include/tesseract/tessembedded.h
include/tesseract/tesseractmain.h
include/tesseract/tessinit.h
include/tesseract/tessio.h
include/tesseract/tessopt.h
include/tesseract/tessout.h
include/tesseract/tessvars.h
include/tesseract/tface.h
include/tesseract/tfacep.h
include/tesseract/tfacepp.h
include/tesseract/topitch.h
include/tesseract/tordmain.h
include/tesseract/tordvars.h
include/tesseract/tospace.h
include/tesseract/tovars.h
include/tesseract/tprintf.h
include/tesseract/training.h
include/tesseract/trie.h
include/tesseract/tstruct.h
include/tesseract/txtregn.h
include/tesseract/underlin.h
include/tesseract/unichar.h
include/tesseract/unicharmap.h
include/tesseract/unicharset.h
include/tesseract/varable.h
include/tesseract/varabled.h
include/tesseract/varblmen.h
include/tesseract/varblwin.h
include/tesseract/variables.h
include/tesseract/vecfuncs.h
include/tesseract/werd.h
include/tesseract/werdit.h
include/tesseract/wordclass.h
include/tesseract/wordseg.h
include/tesseract/xform2d.h
lib/libtesseract_ccstruct.a
lib/libtesseract_ccutil.a
lib/libtesseract_classify.a
lib/libtesseract_cutil.a
lib/libtesseract_dict.a
lib/libtesseract_display.a
lib/libtesseract_image.a
lib/libtesseract_main.a
lib/libtesseract_textord.a
lib/libtesseract_training.a
lib/libtesseract_viewer.a
lib/libtesseract_wordrec.a
libexec/tesseract
share/doc/tesseract/README
share/tessdata/confsets
share/tessdata/eng.DangAmbigs
share/tessdata/eng.freq-dawg
share/tessdata/eng.inttemp
share/tessdata/eng.normproto
share/tessdata/eng.pffmtable
share/tessdata/eng.unicharset
share/tessdata/eng.user-words
share/tessdata/eng.word-dawg
share/tesseract/phototest.tif
@dirrm share/doc/tesseract
@dirrm share/tesseract
@dirrm share/tessdata
@dirrm include/tesseract

View file

@ -0,0 +1,9 @@
$NetBSD: distinfo,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
SHA1 (tesseract-1.04b.tar.gz) = 263a65e462ed864c4da115cdcb3f3e78613de485
RMD160 (tesseract-1.04b.tar.gz) = 5e9c70d4435a59157f0af6503a57b02a4a74350e
Size (tesseract-1.04b.tar.gz) = 2899276 bytes
SHA1 (patch-ae) = c22f254b73fb9bbd02cf8ef7b4ccbea475afd5df
SHA1 (patch-ag) = 581ec7ac0528bb28fddb3fbaa35a87bb1835a82e
SHA1 (patch-ah) = 22987d8523631c5c6e8b2fb5096ff87c5bc13124
SHA1 (patch-ai) = e219077d2acf0652a9bf6418d3f8ce4e11782ed5

View file

@ -0,0 +1,2 @@
#!/bin/sh
exec @PREFIX@/libexec/tesseract "$@"

View file

@ -0,0 +1,16 @@
$NetBSD: patch-ae,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
--- cutil/globals.h.orig 2006-06-17 00:17:07.000000000 +0200
+++ cutil/globals.h
@@ -43,9 +43,11 @@ extern int acts[MAXPROC]; /*actio
extern int debugs[MAXPROC]; /*debug flags */
extern int plots[MAXPROC]; /*plot flags */
extern int corners[4]; /*corners of scan window */
+extern "C" {
extern int optind; /*option index */
extern char *optarg; /*option argument */
/*image file name */
+}
extern char imagefile[FILENAMESIZE];
/* main directory */
extern char directory[FILENAMESIZE];

View file

@ -0,0 +1,15 @@
$NetBSD: patch-ag,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
--- cutil/tordvars.h.orig 2007-05-16 21:33:53.000000000 +0000
+++ cutil/tordvars.h
@@ -45,8 +45,8 @@ extern int similarity_enable;
extern int similarity_debug; /* Level of debug output */
extern int write_raw_output; /* Text before context */
extern int write_output; /* Text file output */
-//extern "C" { extern int display_ratings; } /* Show the ratings */
-extern int display_ratings; /* Show the ratings */
+extern "C" { extern int display_ratings; } /* Show the ratings */
+//extern int display_ratings; /* Show the ratings */
extern int show_bold; /* Use bold text */
extern int display_text; /* Show word text */
extern int display_blocks; /* Show word as boxes */

View file

@ -0,0 +1,13 @@
$NetBSD: patch-ah,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
--- ccutil/debugwin.cpp.orig 2006-06-16 22:17:04.000000000 +0000
+++ ccutil/debugwin.cpp
@@ -229,7 +229,7 @@ DEBUG_WIN::DEBUG_WIN(
length += sprintf (command + length, "trap \"\" 1 2 3 13 15\n");
length +=
sprintf (command + length,
- "/usr/bin/X11/xterm -sb -sl " INT32FORMAT " -geometry "
+ "/usr/X11R6/bin/xterm -sb -sl " INT32FORMAT " -geometry "
INT32FORMAT "x" INT32FORMAT "", buflines, xsize / 8, ysize / 16);
if (xpos >= 0)
command[length++] = '+';

View file

@ -0,0 +1,14 @@
$NetBSD: patch-ai,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
--- configure.orig 2007-02-02 21:37:43.000000000 +0100
+++ configure
@@ -7083,7 +7083,8 @@ else
if test "$cross_compiling" = yes; then
ac_cv_func_fork_works=cross
else
- cat >conftest.$ac_ext <<_ACEOF
+ cat confdefs.h >conftest.$ac_ext
+ cat >>conftest.$ac_ext <<_ACEOF
/* By Ruediger Kuhlmann. */
#include <sys/types.h>
#if HAVE_UNISTD_H