Initial import of tesseract-1.04b from pkgsrc-wip (packaged by heinz@
and myself): This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO OUTPUT FORMATTING, and NO UI. It can only process an image of a single column and create text from it. It can detect fixed pitch vs proportional text. Having said that, in 1995, this engine was in the top 3 in terms of character accuracy, and it compiles and runs on both Linux and Windows. Another current limitation is that it only recognizes English and its character set is only US-ASCII. Training code IS included in the open source release however, and will be included in a future release.
This commit is contained in:
parent
539cc301cc
commit
e899e6021c
9 changed files with 396 additions and 0 deletions
9
graphics/tesseract/DESCR
Normal file
9
graphics/tesseract/DESCR
Normal file
|
@ -0,0 +1,9 @@
|
|||
This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO
|
||||
OUTPUT FORMATTING, and NO UI. It can only process an image of a
|
||||
single column and create text from it. It can detect fixed pitch
|
||||
vs proportional text. Having said that, in 1995, this engine was
|
||||
in the top 3 in terms of character accuracy, and it compiles and
|
||||
runs on both Linux and Windows. Another current limitation is that
|
||||
it only recognizes English and its character set is only US-ASCII.
|
||||
Training code IS included in the open source release however, and
|
||||
will be included in a future release.
|
32
graphics/tesseract/Makefile
Normal file
32
graphics/tesseract/Makefile
Normal file
|
@ -0,0 +1,32 @@
|
|||
# $NetBSD: Makefile,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
|
||||
#
|
||||
|
||||
DISTNAME= tesseract-1.04b
|
||||
CATEGORIES= graphics
|
||||
MASTER_SITES= http://tesseract-ocr.googlecode.com/files/
|
||||
|
||||
MAINTAINER= pkgsrc-users@NetBSD.org
|
||||
HOMEPAGE= http://code.google.com/p/tesseract-ocr/
|
||||
COMMENT= Commercial quality open source OCR engine
|
||||
|
||||
PKG_DESTDIR_SUPPORT=user-destdir
|
||||
|
||||
GNU_CONFIGURE= yes
|
||||
USE_LANGUAGES= c c++
|
||||
WRKSRC= ${WRKDIR}/tesseract-1.04
|
||||
|
||||
post-build:
|
||||
${SED} -e "s,@PREFIX@,${PREFIX}," ${FILESDIR}/tesseract.sh \
|
||||
> ${WRKSRC}/tesseract.sh
|
||||
|
||||
post-install:
|
||||
${INSTALL_LIB_DIR} ${DESTDIR}${PREFIX}/libexec
|
||||
${MV} ${DESTDIR}${PREFIX}/bin/tesseract ${DESTDIR}${PREFIX}/libexec
|
||||
${INSTALL_SCRIPT} ${WRKSRC}/tesseract.sh ${DESTDIR}${PREFIX}/bin/tesseract
|
||||
${INSTALL_DATA_DIR} ${DESTDIR}${PREFIX}/share/doc/tesseract
|
||||
${INSTALL_DATA} ${WRKSRC}/README ${DESTDIR}${PREFIX}/share/doc/tesseract
|
||||
${INSTALL_DATA_DIR} ${DESTDIR}${PREFIX}/share/tesseract
|
||||
${INSTALL_DATA} ${WRKSRC}/phototest.tif ${DESTDIR}${PREFIX}/share/tesseract
|
||||
|
||||
.include "../../graphics/tiff/buildlink3.mk"
|
||||
.include "../../mk/bsd.pkg.mk"
|
286
graphics/tesseract/PLIST
Normal file
286
graphics/tesseract/PLIST
Normal file
|
@ -0,0 +1,286 @@
|
|||
@comment $NetBSD: PLIST,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
|
||||
bin/cntraining
|
||||
bin/mftraining
|
||||
bin/tesseract
|
||||
include/tesseract/adaptions.h
|
||||
include/tesseract/adaptive.h
|
||||
include/tesseract/adaptmatch.h
|
||||
include/tesseract/applybox.h
|
||||
include/tesseract/associate.h
|
||||
include/tesseract/badwords.h
|
||||
include/tesseract/baseapi.h
|
||||
include/tesseract/basedir.h
|
||||
include/tesseract/baseline.h
|
||||
include/tesseract/bestfirst.h
|
||||
include/tesseract/bits16.h
|
||||
include/tesseract/bitstrm.h
|
||||
include/tesseract/bitvec.h
|
||||
include/tesseract/blckerr.h
|
||||
include/tesseract/blkocc.h
|
||||
include/tesseract/blobbox.h
|
||||
include/tesseract/blobclass.h
|
||||
include/tesseract/blobcmp.h
|
||||
include/tesseract/blobcmpl.h
|
||||
include/tesseract/blobs.h
|
||||
include/tesseract/blread.h
|
||||
include/tesseract/callcpp.h
|
||||
include/tesseract/callnet.h
|
||||
include/tesseract/charcut.h
|
||||
include/tesseract/charsample.h
|
||||
include/tesseract/chartoname.h
|
||||
include/tesseract/choicearr.h
|
||||
include/tesseract/choices.h
|
||||
include/tesseract/chop.h
|
||||
include/tesseract/chopper.h
|
||||
include/tesseract/closed.h
|
||||
include/tesseract/clst.h
|
||||
include/tesseract/cluster.h
|
||||
include/tesseract/clusttool.h
|
||||
include/tesseract/cmndwin.h
|
||||
include/tesseract/cnTraining.dsp
|
||||
include/tesseract/const.h
|
||||
include/tesseract/context.h
|
||||
include/tesseract/control.h
|
||||
include/tesseract/coutln.h
|
||||
include/tesseract/crakedge.h
|
||||
include/tesseract/cutil.h
|
||||
include/tesseract/cutoffs.h
|
||||
include/tesseract/danerror.h
|
||||
include/tesseract/dawg.h
|
||||
include/tesseract/debug.h
|
||||
include/tesseract/debugwin.h
|
||||
include/tesseract/djmenus.h
|
||||
include/tesseract/dlltest.cpp
|
||||
include/tesseract/dlltest.dsp
|
||||
include/tesseract/docqual.h
|
||||
include/tesseract/drawedg.h
|
||||
include/tesseract/drawfx.h
|
||||
include/tesseract/drawtord.h
|
||||
include/tesseract/edgblob.h
|
||||
include/tesseract/edgloop.h
|
||||
include/tesseract/efio.h
|
||||
include/tesseract/elst.h
|
||||
include/tesseract/elst2.h
|
||||
include/tesseract/emalloc.h
|
||||
include/tesseract/errcode.h
|
||||
include/tesseract/evntlst.h
|
||||
include/tesseract/evnts.h
|
||||
include/tesseract/expandblob.h
|
||||
include/tesseract/extern.h
|
||||
include/tesseract/extract.h
|
||||
include/tesseract/featdefs.h
|
||||
include/tesseract/fileerr.h
|
||||
include/tesseract/findseam.h
|
||||
include/tesseract/fixspace.h
|
||||
include/tesseract/fixxht.h
|
||||
include/tesseract/flexfx.h
|
||||
include/tesseract/float2int.h
|
||||
include/tesseract/fpchop.h
|
||||
include/tesseract/fpoint.h
|
||||
include/tesseract/freelist.h
|
||||
include/tesseract/funcdefs.h
|
||||
include/tesseract/fxdefs.h
|
||||
include/tesseract/fxid.h
|
||||
include/tesseract/gap_map.h
|
||||
include/tesseract/genblob.h
|
||||
include/tesseract/general.h
|
||||
include/tesseract/globaloc.h
|
||||
include/tesseract/globals.h
|
||||
include/tesseract/gradechop.h
|
||||
include/tesseract/grphics.h
|
||||
include/tesseract/grphshm.h
|
||||
include/tesseract/hashfn.h
|
||||
include/tesseract/heuristic.h
|
||||
include/tesseract/hideedge.h
|
||||
include/tesseract/host.h
|
||||
include/tesseract/hosthplb.h
|
||||
include/tesseract/hpddef.h
|
||||
include/tesseract/hpdsizes.h
|
||||
include/tesseract/hyphen.h
|
||||
include/tesseract/img.h
|
||||
include/tesseract/imgbmp.h
|
||||
include/tesseract/imgerrs.h
|
||||
include/tesseract/imgio.h
|
||||
include/tesseract/imgs.h
|
||||
include/tesseract/imgscale.h
|
||||
include/tesseract/imgtiff.h
|
||||
include/tesseract/imgunpk.h
|
||||
include/tesseract/intfx.h
|
||||
include/tesseract/intmatcher.h
|
||||
include/tesseract/intproto.h
|
||||
include/tesseract/ipoints.h
|
||||
include/tesseract/kdtree.h
|
||||
include/tesseract/labls.h
|
||||
include/tesseract/linlsq.h
|
||||
include/tesseract/listio.h
|
||||
include/tesseract/lmedsq.h
|
||||
include/tesseract/lsterr.h
|
||||
include/tesseract/mainblk.h
|
||||
include/tesseract/makechop.h
|
||||
include/tesseract/makerow.h
|
||||
include/tesseract/matchdefs.h
|
||||
include/tesseract/matchtab.h
|
||||
include/tesseract/matmatch.h
|
||||
include/tesseract/matrix.h
|
||||
include/tesseract/measure.h
|
||||
include/tesseract/memblk.h
|
||||
include/tesseract/memry.h
|
||||
include/tesseract/memryerr.h
|
||||
include/tesseract/mergenf.h
|
||||
include/tesseract/metrics.h
|
||||
include/tesseract/mf.h
|
||||
include/tesseract/mfTraining.dsp
|
||||
include/tesseract/mfcpch.cpp
|
||||
include/tesseract/mfcpch.h
|
||||
include/tesseract/mfdefs.h
|
||||
include/tesseract/mfoutline.h
|
||||
include/tesseract/mfvars.h
|
||||
include/tesseract/mfx.h
|
||||
include/tesseract/minmax.h
|
||||
include/tesseract/mod128.h
|
||||
include/tesseract/msmenus.h
|
||||
include/tesseract/name2char.h
|
||||
include/tesseract/ndminx.h
|
||||
include/tesseract/normalis.h
|
||||
include/tesseract/normfeat.h
|
||||
include/tesseract/normmatch.h
|
||||
include/tesseract/notdll.h
|
||||
include/tesseract/nwmain.h
|
||||
include/tesseract/ocrblock.h
|
||||
include/tesseract/ocrclass.h
|
||||
include/tesseract/ocrfeatures.h
|
||||
include/tesseract/ocrrow.h
|
||||
include/tesseract/ocrshell.h
|
||||
include/tesseract/oldbasel.h
|
||||
include/tesseract/oldheap.h
|
||||
include/tesseract/oldlist.h
|
||||
include/tesseract/olutil.h
|
||||
include/tesseract/outfeat.h
|
||||
include/tesseract/outlines.h
|
||||
include/tesseract/output.h
|
||||
include/tesseract/pageblk.h
|
||||
include/tesseract/pageres.h
|
||||
include/tesseract/pagewalk.h
|
||||
include/tesseract/paircmp.h
|
||||
include/tesseract/pdblock.h
|
||||
include/tesseract/pdclass.h
|
||||
include/tesseract/permdawg.h
|
||||
include/tesseract/permnum.h
|
||||
include/tesseract/permute.h
|
||||
include/tesseract/pgedit.h
|
||||
include/tesseract/pgeditx.h
|
||||
include/tesseract/picofeat.h
|
||||
include/tesseract/pieces.h
|
||||
include/tesseract/pithsync.h
|
||||
include/tesseract/pitsync1.h
|
||||
include/tesseract/platform.h
|
||||
include/tesseract/plotedges.h
|
||||
include/tesseract/plotseg.h
|
||||
include/tesseract/points.h
|
||||
include/tesseract/polyaprx.h
|
||||
include/tesseract/polyblk.h
|
||||
include/tesseract/polyblob.h
|
||||
include/tesseract/polyvert.h
|
||||
include/tesseract/poutline.h
|
||||
include/tesseract/protos.h
|
||||
include/tesseract/quadlsq.h
|
||||
include/tesseract/quadratc.h
|
||||
include/tesseract/quspline.h
|
||||
include/tesseract/ratngs.h
|
||||
include/tesseract/rect.h
|
||||
include/tesseract/rejctmap.h
|
||||
include/tesseract/reject.h
|
||||
include/tesseract/render.h
|
||||
include/tesseract/rwpoly.h
|
||||
include/tesseract/sbdmenu.h
|
||||
include/tesseract/sbgconst.h
|
||||
include/tesseract/sbgdefs.h
|
||||
include/tesseract/sbgtypes.h
|
||||
include/tesseract/scaleimg.h
|
||||
include/tesseract/scanedg.h
|
||||
include/tesseract/scanutils.cpp
|
||||
include/tesseract/scanutils.h
|
||||
include/tesseract/seam.h
|
||||
include/tesseract/secname.h
|
||||
include/tesseract/serialis.h
|
||||
include/tesseract/showim.h
|
||||
include/tesseract/sigmenu.h
|
||||
include/tesseract/sortflts.h
|
||||
include/tesseract/speckle.h
|
||||
include/tesseract/split.h
|
||||
include/tesseract/states.h
|
||||
include/tesseract/statistc.h
|
||||
include/tesseract/stderr.h
|
||||
include/tesseract/stepblob.h
|
||||
include/tesseract/stopper.h
|
||||
include/tesseract/strngs.h
|
||||
include/tesseract/structures.h
|
||||
include/tesseract/submen.h
|
||||
include/tesseract/tally.h
|
||||
include/tesseract/tessarray.h
|
||||
include/tesseract/tessbox.h
|
||||
include/tesseract/tessclas.h
|
||||
include/tesseract/tessedit.h
|
||||
include/tesseract/tessembedded.h
|
||||
include/tesseract/tesseractmain.h
|
||||
include/tesseract/tessinit.h
|
||||
include/tesseract/tessio.h
|
||||
include/tesseract/tessopt.h
|
||||
include/tesseract/tessout.h
|
||||
include/tesseract/tessvars.h
|
||||
include/tesseract/tface.h
|
||||
include/tesseract/tfacep.h
|
||||
include/tesseract/tfacepp.h
|
||||
include/tesseract/topitch.h
|
||||
include/tesseract/tordmain.h
|
||||
include/tesseract/tordvars.h
|
||||
include/tesseract/tospace.h
|
||||
include/tesseract/tovars.h
|
||||
include/tesseract/tprintf.h
|
||||
include/tesseract/training.h
|
||||
include/tesseract/trie.h
|
||||
include/tesseract/tstruct.h
|
||||
include/tesseract/txtregn.h
|
||||
include/tesseract/underlin.h
|
||||
include/tesseract/unichar.h
|
||||
include/tesseract/unicharmap.h
|
||||
include/tesseract/unicharset.h
|
||||
include/tesseract/varable.h
|
||||
include/tesseract/varabled.h
|
||||
include/tesseract/varblmen.h
|
||||
include/tesseract/varblwin.h
|
||||
include/tesseract/variables.h
|
||||
include/tesseract/vecfuncs.h
|
||||
include/tesseract/werd.h
|
||||
include/tesseract/werdit.h
|
||||
include/tesseract/wordclass.h
|
||||
include/tesseract/wordseg.h
|
||||
include/tesseract/xform2d.h
|
||||
lib/libtesseract_ccstruct.a
|
||||
lib/libtesseract_ccutil.a
|
||||
lib/libtesseract_classify.a
|
||||
lib/libtesseract_cutil.a
|
||||
lib/libtesseract_dict.a
|
||||
lib/libtesseract_display.a
|
||||
lib/libtesseract_image.a
|
||||
lib/libtesseract_main.a
|
||||
lib/libtesseract_textord.a
|
||||
lib/libtesseract_training.a
|
||||
lib/libtesseract_viewer.a
|
||||
lib/libtesseract_wordrec.a
|
||||
libexec/tesseract
|
||||
share/doc/tesseract/README
|
||||
share/tessdata/confsets
|
||||
share/tessdata/eng.DangAmbigs
|
||||
share/tessdata/eng.freq-dawg
|
||||
share/tessdata/eng.inttemp
|
||||
share/tessdata/eng.normproto
|
||||
share/tessdata/eng.pffmtable
|
||||
share/tessdata/eng.unicharset
|
||||
share/tessdata/eng.user-words
|
||||
share/tessdata/eng.word-dawg
|
||||
share/tesseract/phototest.tif
|
||||
@dirrm share/doc/tesseract
|
||||
@dirrm share/tesseract
|
||||
@dirrm share/tessdata
|
||||
@dirrm include/tesseract
|
9
graphics/tesseract/distinfo
Normal file
9
graphics/tesseract/distinfo
Normal file
|
@ -0,0 +1,9 @@
|
|||
$NetBSD: distinfo,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
|
||||
|
||||
SHA1 (tesseract-1.04b.tar.gz) = 263a65e462ed864c4da115cdcb3f3e78613de485
|
||||
RMD160 (tesseract-1.04b.tar.gz) = 5e9c70d4435a59157f0af6503a57b02a4a74350e
|
||||
Size (tesseract-1.04b.tar.gz) = 2899276 bytes
|
||||
SHA1 (patch-ae) = c22f254b73fb9bbd02cf8ef7b4ccbea475afd5df
|
||||
SHA1 (patch-ag) = 581ec7ac0528bb28fddb3fbaa35a87bb1835a82e
|
||||
SHA1 (patch-ah) = 22987d8523631c5c6e8b2fb5096ff87c5bc13124
|
||||
SHA1 (patch-ai) = e219077d2acf0652a9bf6418d3f8ce4e11782ed5
|
2
graphics/tesseract/files/tesseract.sh
Normal file
2
graphics/tesseract/files/tesseract.sh
Normal file
|
@ -0,0 +1,2 @@
|
|||
#!/bin/sh
|
||||
exec @PREFIX@/libexec/tesseract "$@"
|
16
graphics/tesseract/patches/patch-ae
Normal file
16
graphics/tesseract/patches/patch-ae
Normal file
|
@ -0,0 +1,16 @@
|
|||
$NetBSD: patch-ae,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
|
||||
|
||||
--- cutil/globals.h.orig 2006-06-17 00:17:07.000000000 +0200
|
||||
+++ cutil/globals.h
|
||||
@@ -43,9 +43,11 @@ extern int acts[MAXPROC]; /*actio
|
||||
extern int debugs[MAXPROC]; /*debug flags */
|
||||
extern int plots[MAXPROC]; /*plot flags */
|
||||
extern int corners[4]; /*corners of scan window */
|
||||
+extern "C" {
|
||||
extern int optind; /*option index */
|
||||
extern char *optarg; /*option argument */
|
||||
/*image file name */
|
||||
+}
|
||||
extern char imagefile[FILENAMESIZE];
|
||||
/* main directory */
|
||||
extern char directory[FILENAMESIZE];
|
15
graphics/tesseract/patches/patch-ag
Normal file
15
graphics/tesseract/patches/patch-ag
Normal file
|
@ -0,0 +1,15 @@
|
|||
$NetBSD: patch-ag,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
|
||||
|
||||
--- cutil/tordvars.h.orig 2007-05-16 21:33:53.000000000 +0000
|
||||
+++ cutil/tordvars.h
|
||||
@@ -45,8 +45,8 @@ extern int similarity_enable;
|
||||
extern int similarity_debug; /* Level of debug output */
|
||||
extern int write_raw_output; /* Text before context */
|
||||
extern int write_output; /* Text file output */
|
||||
-//extern "C" { extern int display_ratings; } /* Show the ratings */
|
||||
-extern int display_ratings; /* Show the ratings */
|
||||
+extern "C" { extern int display_ratings; } /* Show the ratings */
|
||||
+//extern int display_ratings; /* Show the ratings */
|
||||
extern int show_bold; /* Use bold text */
|
||||
extern int display_text; /* Show word text */
|
||||
extern int display_blocks; /* Show word as boxes */
|
13
graphics/tesseract/patches/patch-ah
Normal file
13
graphics/tesseract/patches/patch-ah
Normal file
|
@ -0,0 +1,13 @@
|
|||
$NetBSD: patch-ah,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
|
||||
|
||||
--- ccutil/debugwin.cpp.orig 2006-06-16 22:17:04.000000000 +0000
|
||||
+++ ccutil/debugwin.cpp
|
||||
@@ -229,7 +229,7 @@ DEBUG_WIN::DEBUG_WIN(
|
||||
length += sprintf (command + length, "trap \"\" 1 2 3 13 15\n");
|
||||
length +=
|
||||
sprintf (command + length,
|
||||
- "/usr/bin/X11/xterm -sb -sl " INT32FORMAT " -geometry "
|
||||
+ "/usr/X11R6/bin/xterm -sb -sl " INT32FORMAT " -geometry "
|
||||
INT32FORMAT "x" INT32FORMAT "", buflines, xsize / 8, ysize / 16);
|
||||
if (xpos >= 0)
|
||||
command[length++] = '+';
|
14
graphics/tesseract/patches/patch-ai
Normal file
14
graphics/tesseract/patches/patch-ai
Normal file
|
@ -0,0 +1,14 @@
|
|||
$NetBSD: patch-ai,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
|
||||
|
||||
--- configure.orig 2007-02-02 21:37:43.000000000 +0100
|
||||
+++ configure
|
||||
@@ -7083,7 +7083,8 @@ else
|
||||
if test "$cross_compiling" = yes; then
|
||||
ac_cv_func_fork_works=cross
|
||||
else
|
||||
- cat >conftest.$ac_ext <<_ACEOF
|
||||
+ cat confdefs.h >conftest.$ac_ext
|
||||
+ cat >>conftest.$ac_ext <<_ACEOF
|
||||
/* By Ruediger Kuhlmann. */
|
||||
#include <sys/types.h>
|
||||
#if HAVE_UNISTD_H
|
Loading…
Reference in a new issue