Add crawl, a worm that searches for jpegs
PR: 28304 Submitted by: Pete Fritchman <petef@databits.net>
This commit is contained in:
parent
cb1d95a576
commit
302ddeb73e
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=44354
7 changed files with 69 additions and 0 deletions
|
@ -47,6 +47,7 @@
|
|||
SUBDIR += chimera2
|
||||
SUBDIR += comclear
|
||||
SUBDIR += comline
|
||||
SUBDIR += crawl
|
||||
SUBDIR += css-mode.el
|
||||
SUBDIR += decss
|
||||
SUBDIR += demoroniser
|
||||
|
|
25
www/crawl/Makefile
Normal file
25
www/crawl/Makefile
Normal file
|
@ -0,0 +1,25 @@
|
|||
# New ports collection makefile for: crawl
|
||||
# Date created: 20 June 2001
|
||||
# Whom: Pete Fritchman <petef@databits.net>
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PORTNAME= crawl
|
||||
PORTVERSION= 0.1
|
||||
CATEGORIES= www
|
||||
MASTER_SITES= http://www.monkey.org/~provos/
|
||||
|
||||
MAINTAINER= petef@databits.net
|
||||
|
||||
BUILD_DEPENDS= ${LOCALBASE}/lib/libevent.a:${PORTSDIR}/devel/libevent
|
||||
|
||||
WRKSRC= ${WRKDIR}/${PORTNAME}
|
||||
|
||||
USE_AUTOCONF= yes
|
||||
GNU_CONFIGURE= yes
|
||||
CONFIGURE_ARGS= --with-libevent=${LOCALBASE}
|
||||
|
||||
MAN1= crawl.1
|
||||
|
||||
.include <bsd.port.mk>
|
1
www/crawl/distinfo
Normal file
1
www/crawl/distinfo
Normal file
|
@ -0,0 +1 @@
|
|||
MD5 (crawl-0.1.tar.gz) = 93df9d0e6534bc4fc462950c023ec2e7
|
17
www/crawl/files/patch-configure.in
Normal file
17
www/crawl/files/patch-configure.in
Normal file
|
@ -0,0 +1,17 @@
|
|||
--- configure.in.orig Wed Jun 20 14:41:44 2001
|
||||
+++ configure.in Wed Jun 20 17:30:07 2001
|
||||
@@ -38,11 +38,11 @@
|
||||
;;
|
||||
*)
|
||||
AC_MSG_RESULT($withval)
|
||||
- if test -f $withval/event.h -a -f $withval/libevent.a; then
|
||||
+ if test -f $withval/include/event.h -a -f $withval/lib/libevent.a; then
|
||||
owd=`pwd`
|
||||
if cd $withval; then withval=`pwd`; cd $owd; fi
|
||||
- EVENTINC="-I$withval"
|
||||
- EVENTLIB="-L$withval -levent"
|
||||
+ EVENTINC="-I$withval/include"
|
||||
+ EVENTLIB="-L$withval/lib -levent"
|
||||
else
|
||||
AC_ERROR(event.h or libevent.a not found in $withval)
|
||||
fi
|
1
www/crawl/pkg-comment
Normal file
1
www/crawl/pkg-comment
Normal file
|
@ -0,0 +1 @@
|
|||
A small, efficient web crawler with advanced features
|
23
www/crawl/pkg-descr
Normal file
23
www/crawl/pkg-descr
Normal file
|
@ -0,0 +1,23 @@
|
|||
The crawl utility starts a depth-first traversal of the web at the
|
||||
specified URLs. It stores all JPEG images that match the configured
|
||||
constraints. Crawl is fairly fast and allows for graceful termination.
|
||||
After terminating crawl, it is possible to restart it at exactly
|
||||
the same spot where it was terminated. Crawl keeps a persistent
|
||||
database that allows multiple crawls without revisiting sites.
|
||||
|
||||
The main reason for writing crawl was the lack of simple open source
|
||||
web crawlers. Crawl is only a few thousand lines of code and fairly
|
||||
easy to debug and customize.
|
||||
|
||||
Some of the main features:
|
||||
- Saves encountered JPEG images
|
||||
- Image selection based on regular expressions and size contrainsts
|
||||
- Resume previous crawl after graceful termination
|
||||
- Persistent database of visited URLs
|
||||
- Very small and efficient code
|
||||
- Supports robots.txt
|
||||
|
||||
WWW: http://www.monkey.org/~provos/crawl/
|
||||
|
||||
- Pete
|
||||
petef@databits.net
|
1
www/crawl/pkg-plist
Normal file
1
www/crawl/pkg-plist
Normal file
|
@ -0,0 +1 @@
|
|||
bin/crawl
|
Loading…
Reference in a new issue