- Update to 0.6

PR:		ports/60745
Submitted by:	Ulrich Spoerlein <q@uni.de> (maintainer)
This commit is contained in:
Pav Lucistnik 2003-12-31 00:17:41 +00:00
parent f1552e21b1
commit 8b94b4ebc5
Notes: svn2git 2021-03-31 03:12:20 +00:00
svn path=/head/; revision=96974
10 changed files with 144 additions and 253 deletions

View file

@ -6,7 +6,7 @@
#
PORTNAME= ocrad
PORTVERSION= 0.5
PORTVERSION= 0.6
CATEGORIES= graphics
MASTER_SITES= ${MASTER_SITE_GNU}
MASTER_SITE_SUBDIR= ${PORTNAME}
@ -17,17 +17,33 @@ COMMENT= OCR program implemented as filter
USE_BZIP2= yes
HAS_CONFIGURE= yes
USE_GETOPT_LONG= yes
USE_REINPLACE= yes
CONFIGURE_ARGS= --prefix=${PREFIX}
MAKE_ENV= CPPFLAGS="${CPPFLAGS}" LDFLAGS="${LDFLAGS}"
MAKE_ENV= CPPFLAGS="${CPPFLAGS}" LDFLAGS="${LDFLAGS}" \
INSTALL_PROGRAM="${INSTALL_PROGRAM}"
DOCS= AUTHORS ChangeLog NEWS README TODO
INFO= ocrad
MAN1= ocrad.1
STD_PATCH= textline.cc recognize2.cc bitmap.cc main.cc
.include <bsd.port.pre.mk>
post-patch:
.if (${OSVERSION} < 500000) && ! (defined(USE_GCC) && ${GCCVERSION} > 30000)
.for file in ${STD_PATCH}
@${REINPLACE_CMD} -e 's/std::isspace/isspace/g; s/std::getc/getc/g' \
-e 's/std::ungetc/ungetc/g; s/std::feof/feof/g' \
-e 's/std::ferror/ferror/g' ${WRKSRC}/${file}
.endfor
.endif
.if !defined(NOPORTDOCS)
post-install:
@${MKDIR} ${DOCSDIR}
cd ${WRKSRC} && ${INSTALL_DATA} ${DOCS} ${DOCSDIR}
@${INSTALL_MAN} ${FILESDIR}/ocrad.1 ${PREFIX}/man/man1
.endif
.include <bsd.port.mk>
.include <bsd.port.post.mk>

View file

@ -1 +1 @@
MD5 (ocrad-0.5.tar.bz2) = 75bdfda680ddeede5dafa523a16c7191
MD5 (ocrad-0.6.tar.bz2) = ebcefd3512a4f9d870d302167d8b8ec9

View file

@ -0,0 +1,114 @@
.TH OCRAD 1 "30 December 2003" "0.6" "GNU Ocrad"
.SH NAME
ocrad \- Optical Character Recognition
.SH SYNOPSIS
.I ocrad
\-afhivV \-b NUMBER \-l MODE \-o FILE \-x FILE [FILES ...]
.Sh DESCRIPTION
.LP
.I ocrad
is an OCR (Optical Character Recognition) program
implemented as a filter and based on a feature extraction method. It
reads a bitmap image in pbm format and outputs text in ISO\-8859\-1
(Latin\-1) charset. Also includes a layout analyser able to separate
the columns or blocks of text normally found on printed pages. It can
be used as a stand\-alone console application, or as a backend to other
programs.
.SH OPTIONS
.TP
.I "\-a", "\-\-append"
Append generated text to the output file instead of overwriting it.
.TP
.I "\-b NUMBER", "\-\-block=NUMBER"
Process only the specified text block, beginning from 1.
Is only useful when used in conjunction with layout analysis (see below).
.TP
.I "\-D LEVEL", "\-\-debug=LEVEL"
The Levels are:
.nf
100 - Show raw block list, unordered
99 - Show recursive block list, unordered
98 - Show main block list, unordered
97 - Show recursive block list, ordered
96 - Show main block list, ordered
95..90 - reserved
89 - Show all blocks from every character
88 - Show main black blocks from every character
87 - Show guess list for every character
86 - Show best guess for every character
.fi
.TP
.I "\-f", "\-\-force"
Force overwrite of output file.
.TP
.I "\-h", "\-\-help"
Print an informative help message describing the options and then exit.
.TP
.I "\-i", "\-\-invert"
Invert image levels (white on black).
.TP
.I "\-l MODE", "\-\-layout=MODE"
Enable page layout analysis. The meaning of
.I MODE
is:
.nf
`0' no analysis at all,
`1' column separation,
`2' full analysis.
.fi
.TP
.I "\-o FILE"
Place the output into
.I FILE
instead of into the standard output.
.TP
.I "\-v", "\-\-verbose"
Verbose mode.
.TP
.I "\-V", "\-\-version"
Print the version number of Ocrad on the standard output and then exit.
.TP
.I "\-x FILE"
Write (export) OCR Results File to
.I FILE
\.
.SH BUGS
If you find a bug in GNU Ocrad, please send electronic mail to
<bug-ocrad@gnu.org>. Include the version number, which you can find by
running `ocrad \-\-version'.
.SH CAVEATS
.IP \(bu 2
Scan directly in b/w mode. Convert from grayscale only if you know what
you are doing.
.IP \(bu 2
For better results the characters should be at least 20 pixels high.
.IP \(bu 2
Merged characters are always a problem. Try to avoid them.
.IP \(bu 2
Very bold or very light (broken) characters are also a problem.
.IP \(bu 2
Always see with your own eyes the pbm file before blaming Ocrad for the
results. Remember the saying, "garbage in, garbage out".
.SH TODO
.IP \(bu 2
Deal with broken characters.
.IP \(bu 2
Make a better layout detector. Every character on its line.
.IP \(bu 2
Separate (more) merged characters.
.IP \(bu 2
Deal better with frames, lines, pictures, etc.
.IP \(bu 2
Change to ISO_8859\-15 (update for ISO_8859\-1 with euro sign).
.IP \(bu 2
Add an option for recognizing ISO_8859\-9 chars (Turkish).
.SH GETTING
.I ocrad
is available from http://www.gnu.org/software/ocrad/ocrad.html
.SH AUTHOR
.nf
Antonio Diaz <ant_diaz@teleline.es>
.fi
.SH HISTORY
.I ocrad
0.6 was released in December 2003.

View file

@ -1,8 +1,8 @@
--- Makefile.in.orig Sat Oct 18 01:29:16 2003
+++ Makefile.in Sun Nov 16 18:18:58 2003
--- Makefile.in.orig Thu Dec 18 11:11:05 2003
+++ Makefile.in Tue Dec 30 20:20:01 2003
@@ -4,13 +4,14 @@
DISTNAME = ocrad-0.5
DISTNAME = ocrad-0.6
-CXX = g++
-INSTALL = install
@ -35,8 +35,8 @@
%.o : %.cc
$(CXX) $(CXXFLAGS) -c -o $@ $<
@@ -42,6 +43,7 @@
textline.o : block.h character.h iso_8859_1.h textline.h
@@ -43,6 +44,7 @@
recognize2.o : block.h character.h iso_8859_1.h textline.h
textblock.o : block.h character.h textline.h textblock.h
main.o : block.h blockmap.h bitmap.h character.h textline.h textblock.h
+ $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c main.cc

View file

@ -1,34 +0,0 @@
--- bitmap.cc.orig Sun Nov 16 17:28:14 2003
+++ bitmap.cc Sun Nov 16 17:33:30 2003
@@ -24,12 +24,11 @@
#include "rectangle.h"
#include "bitmap.h"
-
namespace {
char pbm_getrawbyte( FILE * f ) throw( Bitmap::Error )
{
- int ch = std::getc( f );
+ int ch = getc( f );
if( ch == EOF )
throw Bitmap::Error( "end-of-file reading pbm file.\n" );
@@ -58,7 +57,7 @@
char ch;
int i = 0;
- do ch = pbm_getc( f ); while( std::isspace( ch ) );
+ do ch = pbm_getc( f ); while( isspace( ch ) );
if( !std::isdigit( ch ) )
throw Bitmap::Error( "junk in pbm file where an integer should be.\n" );
do { i = (i * 10) + (ch - '0'); ch = pbm_getc( f ); }
@@ -71,7 +70,7 @@
{
char ch;
- do ch = pbm_getc( f ); while( std::isspace( ch ) );
+ do ch = pbm_getc( f ); while( isspace( ch ) );
if( ch == '0' ) return false;
if( ch == '1' ) return true;

View file

@ -1,10 +0,0 @@
--- character.h.orig Sun Nov 16 17:40:28 2003
+++ character.h Sun Nov 16 17:40:44 2003
@@ -64,6 +64,7 @@
void swap_guesses( int i, int j ) throw();
const Guess & guess( int i ) const throw();
int guesses() const throw() { return _guess.size(); }
+#undef isalnum
bool isalnum() const throw();
void join( Character & c ) throw();

View file

@ -1,29 +0,0 @@
--- configure.orig Mon Aug 25 00:07:09 2003
+++ configure Wed Nov 12 22:50:24 2003
@@ -13,7 +13,7 @@
while true ; do
# Break out if there are no more args
- if [ $# == 0 ]; then break ; fi
+ if [ $# = 0 ]; then break ; fi
# Get the first arg, and shuffle
option=$1
@@ -48,7 +48,7 @@
# Find the source files, if location was not specified.
srcdirtext=
-if [ x${srcdir} == x ]; then
+if [ x${srcdir} = x ]; then
srcdirtext="or . or .." ; srcdir=.
if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi
if [ ! -r ${srcdir}/${srctrigger} ] ; then
@@ -65,7 +65,7 @@
fi
# Set srcdir to . if that's what it is.
-if [ $(pwd) == $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi
+if [ $(pwd) = $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi
# write variables to config file.
rm -f Makefile

View file

@ -1,14 +1,15 @@
--- iso_8859_1.h.orig Sun Nov 16 17:14:19 2003
+++ iso_8859_1.h Sun Nov 16 17:33:51 2003
@@ -100,6 +100,11 @@
--- iso_8859_1.h.orig Wed Dec 3 12:12:01 2003
+++ iso_8859_1.h Tue Dec 30 17:12:05 2003
@@ -100,6 +100,12 @@
static unsigned char base_letter( unsigned char ch ) throw();
static unsigned char compose( unsigned char base_letter,
unsigned char accent ) throw();
+#undef isalnum
+#undef isalpha
+#undef islower
+#undef isupper
+#undef isvowel
+#undef toupper
static bool isalnum( unsigned char ch ) throw();
static bool isalpha( unsigned char ch ) throw();
static bool isupper( unsigned char ch ) throw();
static bool islower( unsigned char ch ) throw();

View file

@ -1,21 +0,0 @@
--- main.cc.orig Sat Oct 18 01:27:29 2003
+++ main.cc Sun Nov 16 18:16:41 2003
@@ -28,6 +28,7 @@
#include <cstring>
#include <vector>
#include <getopt.h>
+#include <libgen.h>
#include "common.h"
#include "rectangle.h"
#include "bitmap.h"
@@ -289,8 +290,8 @@
{
if( infile == stdin )
{
- std::ungetc( std::getc( infile ), infile );
- if( std::feof( infile ) || std::ferror( infile ) ) infile = 0;
+ ungetc( getc( infile ), infile );
+ if( feof( infile ) || ferror( infile ) ) infile = 0;
}
while( infile != stdin )
{

View file

@ -1,146 +0,0 @@
--- textline.cc.orig Sun Nov 16 17:39:59 2003
+++ textline.cc Sun Nov 16 17:44:01 2003
@@ -33,7 +33,7 @@
for( ; end < characters(); ++end )
{
Character & c = character( end );
- if( c.guesses() && std::isspace( c.guess(0).ch ) ) break;
+ if( c.guesses() && isspace( c.guess(0).ch ) ) break;
}
return end;
}
@@ -264,11 +264,11 @@
if( c1.guesses() == 1 )
{
unsigned char ch = c1.guess( 0 ).ch;
- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
if( ch != 'c' && ch != 'o' && ch != 's' && ch != 'u' && ch != 'v' &&
ch != 'w' && ch != 'x' && ch != 'z' ) continue;
if( 4 * c1.height() > 5 * mean_height() )
- { c1.only_guess( std::toupper( ch ), 0 ); continue; }
+ { c1.only_guess( toupper( ch ), 0 ); continue; }
if( 5 * c1.height() < 4 * mean_height() ) continue;
for( int j = begin; j < characters(); ++j ) if( j != i )
{
@@ -276,12 +276,12 @@
if( c2.guesses() >= 1 )
{
unsigned char ch2 = c2.guess( 0 ).ch;
- if( std::isspace( ch2 ) ) break;
- if( ( std::isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) ||
- ( ( std::isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' &&
+ if( isspace( ch2 ) ) break;
+ if( ( isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) ||
+ ( ( isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' &&
( c1.height() >= c2.height() ||
Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) )
- { c1.insert_guess( 0, std::toupper( ch ), 1 ); break; }
+ { c1.insert_guess( 0, toupper( ch ), 1 ); break; }
}
}
}
@@ -310,7 +310,7 @@
if( c1.guesses() >= 1 )
{
unsigned char ch = c1.guess( 0 ).ch;
- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
if( ch <= 127 || c1.block_vector().size() < 2 ) continue;
int chb = ISO_8859_1::base_letter( ch );
if( chb != 'o' && chb != 'u' ) continue;
@@ -323,11 +323,11 @@
unsigned char ch2 = c2.guess( 0 ).ch;
int ch2b = ISO_8859_1::base_letter( ch2 );
if( !ch2b && ch2 > 127 ) continue;
- if( std::isspace( ch2 ) ) break;
- if( ( std::isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
- ( std::isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) ||
- ( std::isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
- ( std::isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
+ if( isspace( ch2 ) ) break;
+ if( ( isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
+ ( isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) ||
+ ( isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
+ ( isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
{ c1.insert_guess( 0, ISO_8859_1::toupper( ch ), 1 ); break; }
}
}
@@ -341,7 +341,7 @@
if( c1.guesses() >= 1 )
{
unsigned char ch = c1.guess( 0 ).ch;
- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
if( ch != 'o' && ch != 'O' && ch != 'l' ) continue;
for( int j = begin; j < characters(); ++j ) if( j != i )
{
@@ -349,8 +349,8 @@
if( c2.guesses() >= 1 )
{
unsigned char ch2 = c2.guess( 0 ).ch;
- if( std::isspace( ch2 ) ) break;
- if( std::isdigit( ch2 ) )
+ if( isspace( ch2 ) ) break;
+ if( isdigit( ch2 ) )
{
if( Ocrad::similar( c1.height(), c2.height(), 10 ) )
c1.insert_guess( 0, (ch == 'l') ? '1' : '0', c1.guess(0).value + 1 );
@@ -372,7 +372,7 @@
Character & c2 = character( i );
if( !c2.guesses() ) continue;
unsigned char ch = c2.guess( 0 ).ch;
- if( !std::isalnum( ch ) && ch != '.' && ch != '|' ) continue;
+ if( !isalnum( ch ) && ch != '.' && ch != '|' ) continue;
switch( ch )
{
case 'g': case 'j': case 'p': case 'q': case 'y':
@@ -426,14 +426,14 @@
if( i < characters() - 1 && character( i + 1 ).guesses() )
rch = character( i + 1 ).guess( 0 ).ch;
if( ISO_8859_1::isupper( rch ) &&
- ( !lch || ISO_8859_1::isupper( lch ) || std::isspace( lch ) ) )
+ ( !lch || ISO_8859_1::isupper( lch ) || isspace( lch ) ) )
{ c.insert_guess( 0, 'I', 1 ); continue; }
if( ch == 'l' ) continue;
if( ISO_8859_1::isalpha( lch ) || ISO_8859_1::isalpha( rch ) )
{ c.insert_guess( 0, 'l', 1 ); continue; }
- if( rch == '|' && ( !lch || std::isspace( lch ) ) &&
+ if( rch == '|' && ( !lch || isspace( lch ) ) &&
i < characters() - 2 && character( i + 2 ).guesses() &&
- std::isalpha( character( i + 2 ).guess( 0 ).ch ) )
+ isalpha( character( i + 2 ).guess( 0 ).ch ) )
{ c.insert_guess( 0, 'l', 1 ); continue; }
}
}
@@ -475,7 +475,7 @@
if( c.guesses() )
{
unsigned char ch = c.guess( 0 ).ch;
- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
if( i == begin && ch == 'a' && c.guesses() == 2 &&
c.guess( 1 ).ch == 'Q' && 4 * c.height() > 5 * mean_height() )
c.swap_guesses( 0, 1 );
@@ -501,7 +501,7 @@
if( c1.guesses() )
{
unsigned char ch = c1.guess(0).ch;
- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
if( c1.guesses() != 2 || ch != 'B' || c1.guess(1).ch != 'a' ) continue;
if( 4 * c1.height() > 5 * mean_height() ) continue;
for( int j = begin; j < characters(); ++j ) if( j != i )
@@ -510,9 +510,9 @@
if( c2.guesses() >= 1 )
{
unsigned char ch2 = c2.guess(0).ch;
- if( std::isspace( ch2 ) ) break;
- if( ( std::isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) ||
- ( std::islower( ch2 ) &&
+ if( isspace( ch2 ) ) break;
+ if( ( isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) ||
+ ( islower( ch2 ) &&
( c1.height() <= c2.height() ||
Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) )
{ c1.swap_guesses( 0, 1 ); break; }