Update checkbot to 1.66. Changes:
* checkbot.pl (get_headers): URI doesn't know about netloc, but it does know about authority. (get_headers): $url is already absolute, no need for ->abs * checkbot.pl (handle_doc): Print a notice when external non HTTP/FTP URLs are dropped. * checkbot.pl (init_modules and other places): Remove URI::URL::strict call and use of new URI::URL because it is obsolete, we should use the URI classes now. * checkbot.pl (init_globals): Initialize last checkpoint time with 0 instead of current time, so that we write out a set of pages right at the start. This will catch problems with permissions for these pages as early as possible. * checkbot.pl (get_server_type): Take into account that we might not learn anything about the server * checkbot.pl (get_headers): Factored out of check_external so that moving to using GET requests only will be easier later. * checkbot.pl (send_mail): Really fix printing of starting URLs in email. All URLs are now printed in the subject and body of the message.
This commit is contained in:
parent
e40b0cc41e
commit
74498f630f
3 changed files with 18 additions and 23 deletions
|
@ -1,8 +1,7 @@
|
|||
# $NetBSD: Makefile,v 1.11 2001/11/29 01:12:51 hubertf Exp $
|
||||
# $NetBSD: Makefile,v 1.12 2001/12/12 13:20:32 abs Exp $
|
||||
#
|
||||
|
||||
DISTNAME= checkbot-1.64
|
||||
PKGREVISION= 1
|
||||
DISTNAME= checkbot-1.66
|
||||
CATEGORIES= www net perl5
|
||||
MASTER_SITES= http://degraaff.org/checkbot/
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
$NetBSD: distinfo,v 1.4 2001/08/09 12:35:36 abs Exp $
|
||||
$NetBSD: distinfo,v 1.5 2001/12/12 13:20:32 abs Exp $
|
||||
|
||||
SHA1 (checkbot-1.64.tar.gz) = 14c26e52df4114563b65fac18b38e1463fb786a6
|
||||
Size (checkbot-1.64.tar.gz) = 26752 bytes
|
||||
SHA1 (patch-aa) = cb96e473af9b9e18b63ab58a43e00a03455eb16d
|
||||
SHA1 (checkbot-1.66.tar.gz) = b6f3fd60ef5f4660d6be1b07a75f711a60d019e1
|
||||
Size (checkbot-1.66.tar.gz) = 27628 bytes
|
||||
SHA1 (patch-aa) = 4476469b2d6edae5e7a3286e2ccc4d1b109c6ab0
|
||||
|
|
|
@ -1,16 +1,12 @@
|
|||
$NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
|
||||
$NetBSD: patch-aa,v 1.5 2001/12/12 13:20:32 abs Exp $
|
||||
|
||||
--- checkbot.pl.orig Sun Apr 15 20:34:30 2001
|
||||
--- checkbot.pl.orig Thu Oct 25 20:46:42 2001
|
||||
+++ checkbot.pl
|
||||
@@ -47,8 +47,9 @@
|
||||
|
||||
checkbot [B<--debug>] [B<--help>] [B<--verbose>] [B<--url> start URL]
|
||||
@@ -49,6 +49,7 @@
|
||||
[B<--match> match string] [B<--exclude> exclude string]
|
||||
- [B<--proxy> proxy URL] [B<--internal-only>]
|
||||
- [B<--ignore> ignore string] [B<-file> file name]
|
||||
+ [B<--skip> skip string] [B<--ignore> ignore string]
|
||||
+ [B<--proxy> proxy URL] [B<--internal-only>] [B<--match-url-base>]
|
||||
+ [B<--file> file name]
|
||||
[B<--proxy> proxy URL] [B<--internal-only>]
|
||||
[B<--ignore> ignore string] [B<--file> file name]
|
||||
+ [B<--skip> skip string] [B<--match-url-base>]
|
||||
[B<--style> style file URL]
|
||||
[B<--mailto> email address]
|
||||
[B<--note> note] [B<--sleep> seconds] [B<--timeout> timeout]
|
||||
|
@ -56,7 +52,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
|
|||
# The default for opt_match will be set later, because we might want
|
||||
# to muck with opt_url first.
|
||||
|
||||
@@ -353,7 +366,11 @@
|
||||
@@ -362,7 +375,11 @@
|
||||
my @matchurls;
|
||||
my $matchurl;
|
||||
foreach $matchurl (@starturls) {
|
||||
|
@ -69,7 +65,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
|
|||
}
|
||||
$main::opt_match = '(' . join('|', @matchurls) . ')';
|
||||
print STDERR "--match defaults to $main::opt_match\n" if $main::opt_verbose;
|
||||
@@ -781,7 +798,9 @@
|
||||
@@ -709,7 +726,9 @@
|
||||
print OUT "<tr><th align=left>--url</th><td>Start URL(s)</td><td>",
|
||||
join(',', @starturls), "</td></tr>\n";
|
||||
print OUT "<tr><th align=left>--match</th><td>Match regular expression</td><td>$main::opt_match</td></tr>\n";
|
||||
|
@ -79,7 +75,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
|
|||
print OUT "<tr><th align=left>--ignore</th><td>Ignore regular expression</td><td>$main::opt_ignore</td></tr>\n" if defined $main::opt_ignore;
|
||||
print OUT "<tr><th align=left>--dontwarn</th><td>Don't warn for these codes</td><td>$main::opt_dontwarn</td></tr>\n" if $main::opt_dontwarn ne 'xxx';
|
||||
print OUT "<tr><th align=left>--enable-virtual</th><td>Use virtual names only</td><td>yes</td></tr>\n" if $main::opt_enable_virtual;
|
||||
@@ -923,7 +942,7 @@
|
||||
@@ -851,7 +870,7 @@
|
||||
add_to_queue($url, $response->base);
|
||||
$doc_new++;
|
||||
}
|
||||
|
@ -88,7 +84,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
|
|||
# Add this as an external link if we can check the protocol later
|
||||
if ($url =~ /^(http|ftp):/o) {
|
||||
print EXTERNAL $url . "|" . $response->base . "\n";
|
||||
@@ -947,8 +966,12 @@
|
||||
@@ -972,8 +991,12 @@
|
||||
sub add_to_queue {
|
||||
my ($url, $parent) = @_;
|
||||
|
||||
|
@ -103,7 +99,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
|
|||
}
|
||||
|
||||
sub print_server {
|
||||
@@ -1142,6 +1165,7 @@
|
||||
@@ -1167,6 +1190,7 @@
|
||||
print " --match match Check pages only if URL matches `match'\n";
|
||||
print " If no match is given, the start URL is used as a match\n";
|
||||
print " --exclude exclude Exclude pages if the URL matches 'exclude'\n";
|
||||
|
@ -111,7 +107,7 @@ $NetBSD: patch-aa,v 1.4 2001/08/09 12:35:36 abs Exp $
|
|||
print " --ignore ignore Do not list error messages for pages that the\n";
|
||||
print " URL matches 'ignore'\n";
|
||||
print " --file file Write results to file, default is checkbot.html\n";
|
||||
@@ -1154,8 +1178,9 @@
|
||||
@@ -1179,8 +1203,9 @@
|
||||
print " --interval seconds Maximum time interval between updates (default 10800)\n";
|
||||
print " --dontwarn codes Do not write warnings for these HTTP response codes\n";
|
||||
print " --enable-virtual Use only virtual names, not IP numbers for servers\n";
|
||||
|
|
Loading…
Reference in a new issue