Patch for issue raised in Russ Cox's "Glob Matching Can Be Simple And Fast Too" post.

https://research.swtch.com/glob
https://perl5.git.perl.org/perl.git/commit/33252c318625f3c6c89b816ee88481940e3e6f95

Reviewed by: wiz
This commit is contained in:
sevan 2017-04-28 22:59:48 +00:00
parent 7c2d608290
commit 2e39a2253a
5 changed files with 214 additions and 2 deletions

View file

@ -1,9 +1,10 @@
# $NetBSD: Makefile.common,v 1.28 2017/01/27 09:39:40 adam Exp $
# $NetBSD: Makefile.common,v 1.29 2017/04/28 22:59:48 sevan Exp $
#
# used by lang/perl5/Makefile
# used by databases/p5-gdbm/Makefile
DISTNAME= perl-5.24.1
PKGREVISION= 1
CATEGORIES= lang devel perl5
MASTER_SITES= ${MASTER_SITE_PERL_CPAN:S,/modules/by-module/$,/src/5.0/,}
DISTFILES+= ${DISTNAME}${EXTRACT_SUFX}

View file

@ -1,10 +1,11 @@
$NetBSD: distinfo,v 1.139 2017/01/27 09:39:40 adam Exp $
$NetBSD: distinfo,v 1.140 2017/04/28 22:59:48 sevan Exp $
SHA1 (perl-5.24.1.tar.bz2) = d43ac3d39686462f86eed35b3c298ace74f1ffa0
RMD160 (perl-5.24.1.tar.bz2) = e824cb74998ebbbc3286fa353e64e75104d4c5b1
SHA512 (perl-5.24.1.tar.bz2) = 5a6e5f5fcd65e7add7ba2126d530a8e2a912cb076cfe61bbf7e49b28e4e63aa0d474183a6f8a388c67d03ea6a44f367efb3b3a768e971ef52b769e737eeb048b
Size (perl-5.24.1.tar.bz2) = 14088312 bytes
SHA1 (patch-Configure) = 13455c1b32b0f602b339787af4ddcd481f9c2dd5
SHA1 (patch-MANIFEST) = 7037a7a1881da3d2db03d4a5d6a61a7a6d3bc11b
SHA1 (patch-Makefile.SH) = 32ffc30831b0af49f90119510021037b066367dc
SHA1 (patch-aa) = 9bbcc9395080b11934528a32808e0a509f1d831c
SHA1 (patch-ab) = c899b7221a78e74cc9b1480834baba047dd19f38
@ -17,6 +18,8 @@ SHA1 (patch-cn) = d1877383e213a414562b5bb4c1e8aa785926fab7
SHA1 (patch-dist_Carp_lib_Carp.pm) = fb628ee983462cec9303ceea09852378ec654ecf
SHA1 (patch-dist_Time-HiRes_HiRes.xs) = 067911a23881d48d2ad431076b3babeb585b83d7
SHA1 (patch-ext_Errno_Errno__pm.PL) = 4f135e267da17de38f8f1e7e03d5209bfd09a323
SHA1 (patch-ext_File-Glob_bsd_glob.c) = e43252b55f04bb1cd69d48e8155aa110532c9fbe
SHA1 (patch-ext_File-Glob_t_rt131211.t) = 9aeddad078cdc920e64ed2e73f952be341745d7e
SHA1 (patch-ext_XS-APItest_Makefile.PL) = 7094aa4cb021c1f29054a40c4f5f4c15c59f13de
SHA1 (patch-hints_cygwin.sh) = 1b21d927d6b7379754c4cd64a2b05d3632c35470
SHA1 (patch-hints_netbsd.sh) = 0d549a48800372d75fe34b783529a78cba90f646

View file

@ -0,0 +1,16 @@
$NetBSD: patch-MANIFEST,v 1.1 2017/04/28 22:59:48 sevan Exp $
[perl #131211] fixup File::Glob degenerate matching
https://research.swtch.com/glob
https://perl5.git.perl.org/perl.git/commit/33252c318625f3c6c89b816ee88481940e3e6f95
--- MANIFEST.orig 2017-04-28 18:35:00.000000000 +0000
+++ MANIFEST
@@ -3706,6 +3706,7 @@ ext/File-Glob/t/case.t See if File::Glo
ext/File-Glob/t/global.t See if File::Glob works
ext/File-Glob/TODO File::Glob extension todo list
ext/File-Glob/t/rt114984.t See if File::Glob works
+ext/File-Glob/t/rt131211.t See if File::Glob works
ext/File-Glob/t/taint.t See if File::Glob works
ext/File-Glob/t/threads.t See if File::Glob + threads works
ext/GDBM_File/GDBM_File.pm GDBM extension Perl module

View file

@ -0,0 +1,89 @@
$NetBSD: patch-ext_File-Glob_bsd_glob.c,v 1.1 2017/04/28 22:59:48 sevan Exp $
[perl #131211] fixup File::Glob degenerate matching
https://research.swtch.com/glob
https://perl5.git.perl.org/perl.git/commit/33252c318625f3c6c89b816ee88481940e3e6f95
--- ext/File-Glob/bsd_glob.c.orig 2017-04-28 18:41:33.000000000 +0000
+++ ext/File-Glob/bsd_glob.c
@@ -911,33 +911,43 @@ globextend(const Char *path, glob_t *pgl
/*
* pattern matching function for filenames. Each occurrence of the *
* pattern causes a recursion level.
+ *
+ * Note, this function differs from the original as per the discussion
+ * here: https://research.swtch.com/glob
+ *
+ * Basically we removed the recursion and made it use the algorithm
+ * from Russ Cox to not go quadratic on cases like a file called ("a" x 100) . "x"
+ * matched against a pattern like "a*a*a*a*a*a*a*y".
+ *
*/
static int
match(Char *name, Char *pat, Char *patend, int nocase)
{
int ok, negate_range;
Char c, k;
+ Char *nextp = NULL;
+ Char *nextn = NULL;
+ loop:
while (pat < patend) {
c = *pat++;
switch (c & M_MASK) {
case M_ALL:
if (pat == patend)
return(1);
- do
- if (match(name, pat, patend, nocase))
- return(1);
- while (*name++ != BG_EOS)
- ;
- return(0);
+ if (*name == BG_EOS)
+ return 0;
+ nextn = name + 1;
+ nextp = pat - 1;
+ break;
case M_ONE:
if (*name++ == BG_EOS)
- return(0);
+ goto fail;
break;
case M_SET:
ok = 0;
if ((k = *name++) == BG_EOS)
- return(0);
+ goto fail;
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS)
++pat;
while (((c = *pat++) & M_MASK) != M_END)
@@ -953,16 +963,25 @@ match(Char *name, Char *pat, Char *paten
} else if (nocase ? (tolower(c) == tolower(k)) : (c == k))
ok = 1;
if (ok == negate_range)
- return(0);
+ goto fail;
break;
default:
k = *name++;
if (nocase ? (tolower(k) != tolower(c)) : (k != c))
- return(0);
+ goto fail;
break;
}
}
- return(*name == BG_EOS);
+ if (*name == BG_EOS)
+ return 1;
+
+ fail:
+ if (nextn) {
+ pat = nextp;
+ name = nextn;
+ goto loop;
+ }
+ return 0;
}
/* Free allocated data belonging to a glob_t structure. */

View file

@ -0,0 +1,103 @@
$NetBSD: patch-ext_File-Glob_t_rt131211.t,v 1.1 2017/04/28 22:59:48 sevan Exp $
[perl #131211] fixup File::Glob degenerate matching
https://research.swtch.com/glob
https://perl5.git.perl.org/perl.git/commit/33252c318625f3c6c89b816ee88481940e3e6f95
--- ext/File-Glob/t/rt131211.t.orig 2017-04-28 18:37:15.000000000 +0000
+++ ext/File-Glob/t/rt131211.t
@@ -0,0 +1,94 @@
+use strict;
+use warnings;
+use v5.16.0;
+use File::Temp 'tempdir';
+use File::Spec::Functions;
+use Test::More;
+use Time::HiRes qw(time);
+
+plan tests => 13;
+
+my $path = tempdir uc cleanup => 1;
+my @files= (
+ "x".("a" x 50)."b", # 0
+ "abbbbbbbbbbbbc", # 1
+ "abbbbbbbbbbbbd", # 2
+ "aaabaaaabaaaabc", # 3
+ "pq", # 4
+ "r", # 5
+ "rttiiiiiii", # 6
+ "wewewewewewe", # 7
+ "weeeweeeweee", # 8
+ "weewweewweew", # 9
+ "wewewewewewewewewewewewewewewewewq", # 10
+ "wtttttttetttttttwr", # 11
+);
+
+
+foreach (@files) {
+ open(my $f, ">", catfile $path, $_);
+}
+
+my $elapsed_fail= 0;
+my $elapsed_match= 0;
+my @got_files;
+my @no_files;
+my $count = 0;
+
+while (++$count < 10) {
+ $elapsed_match -= time;
+ @got_files= glob catfile $path, "x".("a*" x $count) . "b";
+ $elapsed_match += time;
+
+ $elapsed_fail -= time;
+ @no_files= glob catfile $path, "x".("a*" x $count) . "c";
+ $elapsed_fail += time;
+ last if $elapsed_fail > $elapsed_match * 100;
+}
+
+is $count,10,
+ "tried all the patterns without bailing out";
+
+cmp_ok $elapsed_fail/$elapsed_match,"<",2,
+ "time to fail less than twice the time to match";
+is "@got_files", catfile($path, $files[0]),
+ "only got the expected file for xa*..b";
+is "@no_files", "", "shouldnt have files for xa*..c";
+
+
+@got_files= glob catfile $path, "a*b*b*b*bc";
+is "@got_files", catfile($path, $files[1]),
+ "only got the expected file for a*b*b*b*bc";
+
+@got_files= sort glob catfile $path, "a*b*b*bc";
+is "@got_files", catfile($path, $files[3])." ".catfile($path,$files[1]),
+ "got the expected two files for a*b*b*bc";
+
+@got_files= sort glob catfile $path, "p*";
+is "@got_files", catfile($path, $files[4]),
+ "p* matches pq";
+
+@got_files= sort glob catfile $path, "r*???????";
+is "@got_files", catfile($path, $files[6]),
+ "r*??????? works as expected";
+
+@got_files= sort glob catfile $path, "w*e*w??e";
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8)),
+ "w*e*w??e works as expected";
+
+@got_files= sort glob catfile $path, "w*e*we??";
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8,9,10)),
+ "w*e*we?? works as expected";
+
+@got_files= sort glob catfile $path, "w**e**w";
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (9)),
+ "w**e**w works as expected";
+
+@got_files= sort glob catfile $path, "*wee*";
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (8,9)),
+ "*wee* works as expected";
+
+@got_files= sort glob catfile $path, "we*";
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8,9,10)),
+ "we* works as expected";
+