Update checkbot to 1.66. Changes:

* checkbot.pl (get_headers): URI doesn't know about netloc, but it
      does know about authority.
      (get_headers): $url is already absolute, no need for ->abs
    * checkbot.pl (handle_doc): Print a notice when external non
      HTTP/FTP URLs are dropped.
    * checkbot.pl (init_modules and other places): Remove
      URI::URL::strict call and use of new URI::URL because it is
      obsolete, we should use the URI classes now.
    * checkbot.pl (init_globals): Initialize last checkpoint time with
      0 instead of current time, so that we write out a set of pages
      right at the start. This will catch problems with permissions for
      these pages as early as possible.
    * checkbot.pl (get_server_type): Take into account that we might
      not learn anything about the server
    * checkbot.pl (get_headers): Factored out of check_external so
      that moving to using GET requests only will be easier later.
    * checkbot.pl (send_mail): Really fix printing of starting URLs in
      email. All URLs are now printed in the subject and body of the
      message.
This commit is contained in:
abs 2001-12-12 13:20:32 +00:00
parent e40b0cc41e
commit 74498f630f
3 changed files with 18 additions and 23 deletions

View file

@ -1,8 +1,7 @@
# $NetBSD: Makefile,v 1.11 2001/11/29 01:12:51 hubertf Exp $
# $NetBSD: Makefile,v 1.12 2001/12/12 13:20:32 abs Exp $
#
DISTNAME= checkbot-1.64
PKGREVISION= 1
DISTNAME= checkbot-1.66
CATEGORIES= www net perl5
MASTER_SITES= http://degraaff.org/checkbot/

View file

@ -1,5 +1,5 @@
$NetBSD: distinfo,v 1.4 2001/08/09 12:35:36 abs Exp $
$NetBSD: distinfo,v 1.5 2001/12/12 13:20:32 abs Exp $
SHA1 (checkbot-1.64.tar.gz) = 14c26e52df4114563b65fac18b38e1463fb786a6
Size (checkbot-1.64.tar.gz) = 26752 bytes
SHA1 (patch-aa) = cb96e473af9b9e18b63ab58a43e00a03455eb16d
SHA1 (checkbot-1.66.tar.gz) = b6f3fd60ef5f4660d6be1b07a75f711a60d019e1
Size (checkbot-1.66.tar.gz) = 27628 bytes
SHA1 (patch-aa) = 4476469b2d6edae5e7a3286e2ccc4d1b109c6ab0

View file

@ -1,16 +1,12 @@
$NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
$NetBSD: patch-aa,v 1.5 2001/12/12 13:20:32 abs Exp $
--- checkbot.pl.orig Sun Apr 15 20:34:30 2001
--- checkbot.pl.orig Thu Oct 25 20:46:42 2001
+++ checkbot.pl
@@ -47,8 +47,9 @@
checkbot [B<--debug>] [B<--help>] [B<--verbose>] [B<--url> start URL]
@@ -49,6 +49,7 @@
[B<--match> match string] [B<--exclude> exclude string]
- [B<--proxy> proxy URL] [B<--internal-only>]
- [B<--ignore> ignore string] [B<-file> file name]
+ [B<--skip> skip string] [B<--ignore> ignore string]
+ [B<--proxy> proxy URL] [B<--internal-only>] [B<--match-url-base>]
+ [B<--file> file name]
[B<--proxy> proxy URL] [B<--internal-only>]
[B<--ignore> ignore string] [B<--file> file name]
+ [B<--skip> skip string] [B<--match-url-base>]
[B<--style> style file URL]
[B<--mailto> email address]
[B<--note> note] [B<--sleep> seconds] [B<--timeout> timeout]
@ -56,7 +52,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
# The default for opt_match will be set later, because we might want
# to muck with opt_url first.
@@ -353,7 +366,11 @@
@@ -362,7 +375,11 @@
my @matchurls;
my $matchurl;
foreach $matchurl (@starturls) {
@ -69,7 +65,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
}
$main::opt_match = '(' . join('|', @matchurls) . ')';
print STDERR "--match defaults to $main::opt_match\n" if $main::opt_verbose;
@@ -781,7 +798,9 @@
@@ -709,7 +726,9 @@
print OUT "<tr><th align=left>--url</th><td>Start URL(s)</td><td>",
join(',', @starturls), "</td></tr>\n";
print OUT "<tr><th align=left>--match</th><td>Match regular expression</td><td>$main::opt_match</td></tr>\n";
@ -79,7 +75,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
print OUT "<tr><th align=left>--ignore</th><td>Ignore regular expression</td><td>$main::opt_ignore</td></tr>\n" if defined $main::opt_ignore;
print OUT "<tr><th align=left>--dontwarn</th><td>Don't warn for these codes</td><td>$main::opt_dontwarn</td></tr>\n" if $main::opt_dontwarn ne 'xxx';
print OUT "<tr><th align=left>--enable-virtual</th><td>Use virtual names only</td><td>yes</td></tr>\n" if $main::opt_enable_virtual;
@@ -923,7 +942,7 @@
@@ -851,7 +870,7 @@
add_to_queue($url, $response->base);
$doc_new++;
}
@ -88,7 +84,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
# Add this as an external link if we can check the protocol later
if ($url =~ /^(http|ftp):/o) {
print EXTERNAL $url . "|" . $response->base . "\n";
@@ -947,8 +966,12 @@
@@ -972,8 +991,12 @@
sub add_to_queue {
my ($url, $parent) = @_;
@ -103,7 +99,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
}
sub print_server {
@@ -1142,6 +1165,7 @@
@@ -1167,6 +1190,7 @@
print " --match match Check pages only if URL matches `match'\n";
print " If no match is given, the start URL is used as a match\n";
print " --exclude exclude Exclude pages if the URL matches 'exclude'\n";
@ -111,7 +107,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
print " --ignore ignore Do not list error messages for pages that the\n";
print " URL matches 'ignore'\n";
print " --file file Write results to file, default is checkbot.html\n";
@@ -1154,8 +1178,9 @@
@@ -1179,8 +1203,9 @@
print " --interval seconds Maximum time interval between updates (default 10800)\n";
print " --dontwarn codes Do not write warnings for these HTTP response codes\n";
print " --enable-virtual Use only virtual names, not IP numbers for servers\n";