Update namazu2 to namazu 2.0.7.

Overview of Changes in Namazu 2.0.7 - Sep 13, 2001

* Bug fix release.
* Fix nmz_is_lang_ja() to return correct value.
* Fix header files installation.
* Fix mknmz checkpoint bug.
* Added extensions of Ichitaro 7 and 8.
* Fixed the bug when target directories contain the
  current directory. (Only Win32 Platforms)
* Fixed the AND query bug that namazu ignores
  the keywords which don't match at all.
* Lookup both environment variables of SCRIPT_NAME
  and DOCUMENT_URI (instead of DOCUMENT_NAME). (namazu.cgi)
* More portability improvements.

I've also applied those bug fixes after 2.0.7 released.
(All URL bellow are in Japanese.)

http://www.namazu.org/ml/namazu-users-ja/msg02073.html
	pl/util.pl

http://www.namazu.org/ml/namazu-devel-ja/msg02024.html
	pl/htmlsplit.pl

http://www.namazu.org/ml/namazu-devel-ja/msg02030.html
	scripts/mknmz.in
This commit is contained in:
taca 2001-09-24 03:56:01 +00:00
parent d5c160e227
commit 34ee40ea17
15 changed files with 369 additions and 53 deletions

View file

@ -1,8 +1,7 @@
# $NetBSD: Makefile,v 1.6 2001/08/21 14:01:18 taca Exp $
# $NetBSD: Makefile,v 1.7 2001/09/24 03:56:01 taca Exp $
# FreeBSD Id: ports/japanese/namazu2/Makefile,v 1.16 2000/10/26 20:56:25 knu Exp
DISTNAME= namazu-2.0.6
PKGNAME= ${DISTNAME}nb1
DISTNAME= namazu-2.0.7
CATEGORIES= japanese textproc www
MASTER_SITES= ftp://ftp.namazu.org/namazu/stable/ \
http://www.namazu.org/stable/
@ -29,7 +28,7 @@ CONFIGURE_ARGS+= -with-chasen
DEINSTALL_FILE= ${WRKDIR}/DEINSTALL
INSTALL_FILE= ${WRKDIR}/INSTALL
USE_LIBINTL= # defined
USE_BUILDLINK_ONLY= # defined
USE_PERL5= # defined
GNU_CONFIGURE= # defined
USE_LIBTOOL= # defined
@ -52,4 +51,5 @@ post-install:
@${TEST} -d ${LOCALBASE}/etc/namazu || ${INSTALL_DATA_DIR} ${LOCALBASE}/etc/namazu
@${SH} ${WRKDIR}/INSTALL ${PKGNAME} POST-INSTALL
.include "../../devel/gettext-lib/buildlink.mk"
.include "../../mk/bsd.pkg.mk"

View file

@ -1,9 +1,14 @@
$NetBSD: distinfo,v 1.4 2001/08/21 14:01:18 taca Exp $
$NetBSD: distinfo,v 1.5 2001/09/24 03:56:01 taca Exp $
SHA1 (namazu-2.0.6.tar.gz) = d2fa5b326da628af9fe279c4f26261795b463707
Size (namazu-2.0.6.tar.gz) = 829209 bytes
SHA1 (namazu-2.0.7.tar.gz) = da0113f61d2b46f3b65973fda8718acaad1725d4
Size (namazu-2.0.7.tar.gz) = 829183 bytes
SHA1 (patch-aa) = 336a7f68842d4a9ca29ce085ccded45fe0b1392f
SHA1 (patch-ab) = 92e64af4b94f88ddea605d5b21064f33fff00798
SHA1 (patch-ac) = ad28a080b2d77863cf24fbc702f013245b3b7dda
SHA1 (patch-ad) = 95c897675de2e4a1914b6bc55e34d40c9b168505
SHA1 (patch-ae) = 881944e91b497f87ef241201b206c2d5ce2b65fd
SHA1 (patch-ac) = 2de6a77047141c6f9ac36180bfcc7f261e7b16df
SHA1 (patch-af) = a5a4c913980e163813f06749445c980eb4644737
SHA1 (patch-ag) = 626af603170bca527cedc4d27e07e1c07259a701
SHA1 (patch-ah) = e38d7efb8e1e8a6e80e96fe84f9c180ec8a7fbb4
SHA1 (patch-ai) = 49d685ddd93207f984813e05a21da60fe1862bef
SHA1 (patch-aj) = f628f63fcac7902e9b8027ec310120ddd936e00a
SHA1 (patch-ak) = 3ffd48066fb4b689dc535ea2c48da28dccf1b881
SHA1 (patch-al) = 6f7317274461c254843f0a53b9f34c1bf54147ad

View file

@ -0,0 +1,13 @@
$NetBSD: patch-ac,v 1.1 2001/09/24 03:56:02 taca Exp $
--- filter/mhonarc.pl.orig Tue Feb 29 13:25:33 2000
+++ filter/mhonarc.pl
@@ -105,7 +105,7 @@
$$contref =~ s/<!--X-Head-Body-Sep-Begin-->/\n/;
# Handle a field consists of two or more lines.
- $$contref =~ s!^(<LI>)(.*?)(</LI>$)!$1 . lftospace($2) . $3!gems;
+ $$contref =~ s!^(<LI>)(.*?)(</LI>$)!$1 . lftospace($2) . $3!gemsi;
# For plugging spaces before headers
$$contref =~ s/^<LI>//gim;

View file

@ -1,13 +0,0 @@
$NetBSD: patch-ad,v 1.1 2001/08/21 14:01:19 taca Exp $
--- nmz/Makefile.in.orig Fri Aug 10 09:38:15 2001
+++ nmz/Makefile.in
@@ -133,7 +133,7 @@
INCLUDES = -I$(srcdir) -I.. -I$(srcdir)/../lib -I$(srcdir)/../src -I$(srcdir)/../intl -I$(srcdir)/../nmz
-pkginclude_HEADERS = codeconv.h field.h hlist.h i18n.h idxname.h libnamazu.h l10n-ja.h parser.h query.h re.h regex.h search.h util.h var.h wakati.h
+pkginclude_HEADERS = codeconv.h field.h hlist.h i18n.h idxname.h libnamazu.h l10n-ja.h parser.h query.h re.h regex.h search.h util.h var.h wakati.h score.h
lib_LTLIBRARIES = libnmz.la

View file

@ -1,21 +0,0 @@
$NetBSD: patch-ae,v 1.1 2001/08/21 14:01:19 taca Exp $
--- nmz/l10n-ja.c.orig Thu Jun 21 15:21:32 2001
+++ nmz/l10n-ja.c
@@ -47,13 +47,13 @@
const char *lang;
lang = nmz_get_lang_ctype();
- if (strcmp(lang, "japanese")) {
+ if (strcmp(lang, "japanese") == 0) {
return 1; /* TRUE */
}
- if (strcmp(lang, "ja")) {
+ if (strcmp(lang, "ja") == 0) {
return 1; /* TRUE */
}
- if (strncmp(lang, "ja_JP", 5)) {
+ if (strncmp(lang, "ja_JP", 5) == 0) {
return 1; /* TRUE */
}
return 0; /* FALSE */

View file

@ -0,0 +1,34 @@
$NetBSD: patch-af,v 1.1 2001/09/24 03:56:02 taca Exp $
--- lib/getopt.c.orig Mon Oct 11 13:25:11 1999
+++ lib/getopt.c
@@ -71,10 +71,11 @@
#ifdef VMS
#include <unixlib.h>
+#endif
+
#if HAVE_STRING_H - 0
#include <string.h>
#endif
-#endif
#if defined (WIN32) && !defined (__CYGWIN32__)
/* It's not Unix, really. See? Capital letters. */
@@ -695,7 +696,7 @@
optarg = nameend + 1;
else
{
- if (opterr)
+ if (opterr) {
if (argv[optind - 1][1] == '-')
/* --option */
fprintf (stderr,
@@ -706,6 +707,7 @@
fprintf (stderr,
_("%s: option `%c%s' doesn't allow an argument\n"),
argv[0], argv[optind - 1][0], pfound->name);
+ }
nextchar += strlen (nextchar);

View file

@ -0,0 +1,183 @@
$NetBSD: patch-ag,v 1.1 2001/09/24 03:56:02 taca Exp $
--- nmz/regex.c.orig Sat Sep 1 18:40:52 2001
+++ nmz/regex.c
@@ -71,6 +71,12 @@
void free _((void*));
#endif
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
/* #define NO_ALLOCA */ /* try it out for now */
#ifndef NO_ALLOCA
/* Make alloca work the best possible way. */
@@ -99,12 +105,6 @@
# endif
#endif /* __GNUC__ */
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
-
#define RE_ALLOCATE alloca
#ifdef C_ALLOCA
#define FREE_VARIABLES() alloca(0)
@@ -153,7 +153,9 @@
static void insert_jump _((int, char*, char*, char*));
static void store_jump_n _((char*, int, char*, unsigned));
static void insert_jump_n _((int, char*, char*, char*, unsigned));
+#if 0
static void insert_op _((int, char*, char*));
+#endif
static void insert_op_2 _((int, char*, char*, int, int));
static int memcmp_translate _((unsigned char*, unsigned char*, int));
@@ -175,9 +177,7 @@
#undef P
-#ifdef RUBY
#include "util.h"
-#endif
static void
init_syntax_once()
@@ -358,7 +358,7 @@
wordbeg, /* Succeeds if at word beginning. */
wordend, /* Succeeds if at word end. */
wordbound, /* Succeeds if at a word boundary. */
- notwordbound,/* Succeeds if not at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
};
@@ -409,6 +409,7 @@
long syntax;
{
/* obsolete */
+ return 0;
}
@@ -442,7 +443,7 @@
int n = mbclen(c) - 1; \
c &= (1<<(BYTEWIDTH-2-n)) - 1; \
while (n--) { \
- c = c << 6 | *p++ & ((1<<6)-1); \
+ c = (c << 6) | (*p++ & ((1<<6)-1)); \
} \
} \
else { \
@@ -483,23 +484,28 @@
{
if (current_mbctype == MBCTYPE_UTF8) {
if (c < 0x80)
- printf("%c", c);
+ printf("%c", (int)c);
else if (c <= 0x7ff)
- printf("%c%c", utf8_firstbyte(c), c&0x3f);
+ printf("%c%c", (int)utf8_firstbyte(c), (int)(c & 0x3f));
else if (c <= 0xffff)
- printf("%c%c%c", utf8_firstbyte(c), (c>>6)&0x3f, c&0x3f);
+ printf("%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 6) & 0x3f),
+ (int)(c & 0x3f));
else if (c <= 0x1fffff)
- printf("%c%c%c%c", utf8_firstbyte(c), (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f);
+ printf("%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 12) & 0x3f),
+ (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
else if (c <= 0x3ffffff)
- printf("%c%c%c%c%c", utf8_firstbyte(c), (c>>18)&0x3f, (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f);
+ printf("%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 18) & 0x3f),
+ (int)((c >> 12) & 0x3f), (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
else if (c <= 0x7fffffff)
- printf("%c%c%c%c%c%c", utf8_firstbyte(c), (c>>24)&0x3f, (c>>18)&0x3f, (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f);
+ printf("%c%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 24) & 0x3f),
+ (int)((c >> 18) & 0x3f), (int)((c >> 12) & 0x3f),
+ (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
}
else if (c < 0xff) {
- printf("\\%o", c);
+ printf("\\%o", (int)c);
}
else {
- printf("%c%c", c>>BYTEWIDTH, c&0xff);
+ printf("%c%c", (int)(c >> BYTEWIDTH), (int)(c &0xff));
}
}
@@ -700,6 +706,7 @@
return 0;
}
+#if 0
static void
print_partial_compiled_pattern(start, end)
unsigned char *start;
@@ -949,6 +956,7 @@
print_partial_compiled_pattern (buffer, buffer + bufp->used);
}
+#endif
static char*
calculate_must_string(start, end)
@@ -1118,7 +1126,7 @@
register const char *p = pattern;
const char *nextp;
const char *pend = pattern + size;
- register unsigned int c, c1;
+ register unsigned int c, c1 = 0;
const char *p0;
int numlen;
@@ -1412,8 +1420,8 @@
case 'W':
for (c = 0; c < (1 << BYTEWIDTH); c++) {
if (SYNTAX(c) != Sword &&
- (current_mbctype && !re_mbctab[c] ||
- !current_mbctype && SYNTAX(c) != Sword2))
+ ((current_mbctype && !re_mbctab[c]) ||
+ (!current_mbctype && SYNTAX(c) != Sword2)))
SET_LIST_BIT(c);
}
last = -1;
@@ -2241,6 +2249,8 @@
case dummy_failure_jump:
bufp->options |= RE_OPTIMIZE_ANCHOR;
break;
+ default:
+ break;
}
}
else if (*laststart == charset || *laststart == charset_not) {
@@ -2409,6 +2419,7 @@
}
+#if 0
/* Open up space at location THERE, and insert operation OP.
CURRENT_END gives the end of the storage in use, so
we know how much data to copy up.
@@ -2428,7 +2439,7 @@
there[0] = (char)op;
}
-
+#endif
/* Open up space at location THERE, and insert operation OP followed by
NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
@@ -4095,7 +4106,6 @@
p1 = p;
/* If failed to a backwards jump that's part of a repetition
loop, need to pop this failure point and use the next one. */
- pop_loop:
switch ((enum regexpcode)*p1) {
case jump_n:
case finalize_push_n:

View file

@ -0,0 +1,29 @@
$NetBSD: patch-ah,v 1.1 2001/09/24 03:56:02 taca Exp $
--- nmz/util.c.orig Sun Sep 2 16:13:37 2001
+++ nmz/util.c
@@ -102,9 +102,9 @@
*/
unsigned long
-nmz_scan_oct(char *start, int len, int *retlen)
+nmz_scan_oct(const char *start, int len, int *retlen)
{
- register char *s = start;
+ register const char *s = start;
register unsigned long retval = 0;
while (len-- && *s >= '0' && *s <= '7') {
@@ -117,10 +117,10 @@
}
unsigned long
-nmz_scan_hex(char *start, int len, int *retlen)
+nmz_scan_hex(const char *start, int len, int *retlen)
{
static char hexdigit[] = "0123456789abcdef0123456789ABCDEFx";
- register char *s = start;
+ register const char *s = start;
register unsigned long retval = 0;
char *tmp;

View file

@ -0,0 +1,15 @@
$NetBSD: patch-ai,v 1.1 2001/09/24 03:56:02 taca Exp $
--- nmz/util.h.orig Mon Jul 9 16:30:37 2001
+++ nmz/util.h
@@ -14,8 +14,8 @@
#define nmz_iseuc_kana1st(c) ((uchar)(c) == 0x8e)
#define nmz_iseuc_hojo1st(c) ((uchar)(c) == 0x8f)
-extern unsigned long nmz_scan_oct ( char *start, int len, int *retlen );
-extern unsigned long nmz_scan_hex ( char *start, int len, int *retlen );
+extern unsigned long nmz_scan_oct (const char *start, int len, int *retlen );
+extern unsigned long nmz_scan_hex (const char *start, int len, int *retlen );
extern void * nmz_xmalloc ( unsigned long size );
extern void * nmz_xrealloc ( void *ptr, unsigned long size );
extern void nmz_tr ( char *str, const char *lstr, const char *rstr );

View file

@ -0,0 +1,39 @@
$NetBSD: patch-aj,v 1.1 2001/09/24 03:56:02 taca Exp $
--- pl/htmlsplit.pl.orig Wed Jan 10 17:42:52 2001
+++ pl/htmlsplit.pl
@@ -72,15 +72,16 @@
my $id = 0;
# $cont =~ s/(<a\s[^>]*href=(["']))#(.+?)(\2[^>]*>)/$1$3.html$4/gi; #'
+ $cont =~ s#(<a[^>]*\s+)name=(["'])\2([^>]*>(.*?)</a>)#$1$4#sgi;
$cont =~ s {
- \G(.+?) # 1
- (<h([1-6])>)?\s* # 2, 3
- <a[^>]*\s+name=([a-zA-Z0-9-\.]+| # 4,
- (["']).+?\5)[^>]*>(.*?)</a> # 5,6
- \s*(</h\3>)? # 7
- } {
- write_partial_file($1, $4, $6, $id++, $mtime, \%info)
- }sgexi;
+ \G(.+?) # 1
+ (<h([1-6])>)?\s* # 2, 3
+ <a[^>]*\s+name=([a-zA-Z0-9-\.]+| # 4,
+ (["']).+?\5)[^>]*>(.*?)</a> # 5,6
+ \s*(</h\3>)? # 7
+ } {
+ write_partial_file($1, $4, $6, $id++, $mtime, \%info)
+ }sgexi;
write_partial_file($cont, "", "", $id, $mtime, \%info);
return @{$info{'names'}};
@@ -130,6 +131,9 @@
my $orig_title = $info_ref->{'title'};
my $prev_name = $info_ref->{'name'};
my $prev_anchored = $info_ref->{'anchored'};
+
+ $prev_name =~ s#\n\r##sg;
+ $prev_name =~ s#\n##sg;
html::remove_html_elements(\$prev_anchored);
$prev_anchored =~ s/^\s+//;

View file

@ -0,0 +1,14 @@
$NetBSD: patch-ak,v 1.1 2001/09/24 03:56:02 taca Exp $
--- pl/util.pl.orig Mon Aug 20 22:32:20 2001
+++ pl/util.pl
@@ -145,7 +145,8 @@
return '';
}
} else {
- $fh = efopen($arg) || return 0; # in case file is removed after find_file
+ $fh = fopen($arg) || return 0; # in case file is removed after find_file
+ # 2.0.7 had problem
}
my $size = -s $fh;
return $size;

View file

@ -0,0 +1,13 @@
$NetBSD: patch-al,v 1.1 2001/09/24 03:56:02 taca Exp $
--- scripts/mknmz.in.orig Wed Aug 22 15:16:20 2001
+++ scripts/mknmz.in
@@ -179,6 +179,8 @@
my $processed_num = 0;
my $file_size = util::filesize($cfile);
+ return ($file_size, $processed_num) unless $file_size;
+
if ($var::Opt{'htmlsplit'} && $cfile =~ $conf::HTML_SUFFIX) {
my @parts = htmlsplit::split($cfile, "NMZ.partial");
if (@parts > 1) {

View file

@ -3,8 +3,3 @@ as a CGI program for a small or medium scale Web search engine, but also works
as a personal use search system for your pile of email.
(The Japanese word `Namazu' means `catfish' in English)
WWW: http://www.namazu.org/
*WARNING*: Index file format has changed since namazu 1.3.0.11.
You should care for upgrading from old version.

View file

@ -1,9 +1,9 @@
$NetBSD: MESSAGE,v 1.4 2001/08/21 14:01:19 taca Exp $
$NetBSD: MESSAGE,v 1.5 2001/09/24 03:56:03 taca Exp $
*WARNING*:
1. Index file format has changed since namazu 2.0. If you upgrade
from namazu 1.X, you need to rebuild index files.
1. Since index file format is differnet from namazu 1.X, you need to
rebuild index files if you upgrade from namazu 1.X.
2. In order to processing Japanese, you need to set "LC_ALL", "LANG"
or "LC_CTYPE" environment to "ja". Or use --indexing-lang option
@ -16,3 +16,10 @@ $NetBSD: MESSAGE,v 1.4 2001/08/21 14:01:19 taca Exp $
for perl to stop the warning on NetBSD 1.5.X and before.
On NetBSD current (1.6 or later), you don't need to set
"PERL_BADLANG" but "LC_LANG" environment.
This package includes some bug fixes after namazu 2.0.7 release. Here
is referenced URLs and modified files in source tree.
http://www.namazu.org/ml/namazu-users-ja/msg02073.html pl/util.pl
http://www.namazu.org/ml/namazu-devel-ja/msg02024.html pl/htmlsplit.pl
http://www.namazu.org/ml/namazu-devel-ja/msg02030.html scripts/mknmz.in

View file

@ -1,12 +1,15 @@
@comment $NetBSD: PLIST,v 1.2 2001/08/21 14:01:19 taca Exp $
@comment $NetBSD: PLIST,v 1.3 2001/09/24 03:56:03 taca Exp $
bin/adnmz
bin/bnamazu
bin/gcnmz
bin/kwnmz
bin/lnnmz
bin/mailutime
bin/mknmz
bin/namazu
bin/nmz-config
bin/nmzgrep
bin/nmzmerge
bin/rfnmz
bin/vfnmz
@exec /bin/mkdir %D/etc/namazu 2>/dev/null|| /usr/bin/true