447 lines
12 KiB
Perl
447 lines
12 KiB
Perl
#! /usr/local/bin/perl -W
|
|
eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
|
|
if 0; #$running_under_some_shell
|
|
|
|
=head1 NAME
|
|
|
|
pkgs_which - Quickly find packages where trees of files got installed
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
pkgs_which [-oqvsd] {dir|file} [...]
|
|
|
|
pkgs_which {-h|-?|--help}
|
|
|
|
pkgs_which --man
|
|
|
|
=head1 OPTIONS
|
|
|
|
--origins, -o print package origins instead of names
|
|
--quiet, -q only print actual package names
|
|
--verbose, -v also print unmatched files
|
|
--sort, -s sort package and file lists
|
|
--[no-]cacheall read and cache all package file lists first
|
|
--[no-]find descend directories on the command line [default]
|
|
|
|
--debug, -d emit additional debug information on stderr
|
|
|
|
--help, -h, -? print a brief help message and quit
|
|
--man show the full full documentation and quit
|
|
|
|
Long options can be abbreviated to the shortest unambiguous string.
|
|
Short options can be bundled (Example: pkgs_which -qo ...).
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
pkgs_which is a tool to efficiently look up which FreeBSD ports or
|
|
packages installed the files on its command line, or the files in the
|
|
directories on the command line.
|
|
|
|
pkgs_which
|
|
|
|
=over
|
|
|
|
=item * accepts files on the command line, which are looked up directly,
|
|
|
|
=item * accepts directories on the command line, which are
|
|
recursively scanned for regular files, which are then looked up,
|
|
|
|
=item * accepts an arbitrary mix of files and directories on the command
|
|
line,
|
|
|
|
=item * prints each port or package only once,
|
|
|
|
=item * prints port/package names by default, but can print origins
|
|
instead (--origins option).
|
|
|
|
=item * supports a "quiet" mode that emits output suitable for scripting
|
|
and shell command expansion (see EXAMPLES below)
|
|
|
|
=item * is optimized for efficient bulk lookups of data without
|
|
assistance of an on-disk database.
|
|
|
|
=back
|
|
|
|
It is most useful for quickly obtaining a list of site-packages that
|
|
need to be reinstalled after upgrading a script language interpreter to
|
|
a new version that uses new directories for its site-packages, for
|
|
instance, after a Python 2.6 => 2.7 or Perl 5.10 => 5.12 upgrade, and is
|
|
a good companion to L<portmaster>(8).
|
|
|
|
=head2 IMPLEMENTATION NOTES
|
|
|
|
pkgs_which uses pkg_info -L to accelerate the process. It first obtains
|
|
a list of all files, looks at a random one, looks up the corresponding
|
|
package and records its name, and then purges all files belonging to it
|
|
before looking up the next file.
|
|
|
|
pkgs_which does not spawn subshells for pkg_info for security reasons,
|
|
and makes sure to launder the pkg_info output.
|
|
|
|
The --cacheall option (default on) makes pkgs_which read all package
|
|
file lists upon start. This takes a few seconds on a GHz-class computer
|
|
but voids the need to run pkg_info -W often later on.
|
|
|
|
For looking up very few files, it is more efficient to use --no-cacheall.
|
|
|
|
=head2 RELATED TOOLS
|
|
|
|
pkgs_which performs a similar task to L<pkg_which>(1) that is part of
|
|
the ports-mgmt/portupgrade port, but unlike the latter, it does not
|
|
require a database, and is optimized for bulk lookups of entire
|
|
directory trees.
|
|
|
|
L<portmaster>(8) is a tool written by Doug Barton to upgrade installed
|
|
ports and their dependencies that does not require port/package
|
|
databases.
|
|
|
|
=cut
|
|
|
|
require 5.008_000;
|
|
use strict;
|
|
use English '-no_match_vars';
|
|
use vars qw($UID $GID $EUID $EGID);
|
|
use File::Find ();
|
|
use Getopt::Long qw(:config no_ignore_case bundling);
|
|
use Pod::Usage;
|
|
|
|
# ### HARD WIRED CONFIGURATION HERE ###
|
|
|
|
# Use a safe path
|
|
$ENV{'PATH'} = '/bin:/usr/bin:/sbin:/usr/sbin';
|
|
|
|
# Where pkg_info lives
|
|
my $PKG_INFO = '/usr/sbin/pkg_info';
|
|
my $PKGNG = '/usr/local/sbin/pkg';
|
|
my $PKGNGDB = '/var/db/pkg/local.sqlite';
|
|
|
|
# Which regexp to use for laundering tainted file
|
|
# and package names - note that this must not be let
|
|
# near a shell as it contains glob characters!
|
|
my $UNTAINT = qr|^([()[\]{}\-+@\w.,/\$%!=~:^ *?]+)$|o;
|
|
|
|
# Default for cacheall.
|
|
my $cacheall = 1;
|
|
|
|
# ### NO USER SERVICEABLE PARTS BELOW THIS LINE ###
|
|
|
|
my $rc = 0;
|
|
|
|
my $PKGNG_MODE = 0;
|
|
if (-e $PKGNG and -e $PKGNGDB) { $PKGNG_MODE = 1; }
|
|
|
|
# Clean environment a bit
|
|
delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
|
|
|
|
# parse options
|
|
my $man = 0;
|
|
my $help = 0;
|
|
my $debug = 0;
|
|
my $verbose = 0;
|
|
my $quiet = 0;
|
|
my $origins = 0;
|
|
my $sort = 0;
|
|
my $find = 1;
|
|
|
|
GetOptions('help|h|?' => \$help,
|
|
'man' => \$man,
|
|
'cacheall!' => \$cacheall,
|
|
'debug|d' => \$debug,
|
|
'origins|o' => \$origins,
|
|
'quiet|q' => sub { $quiet = 1; $verbose = 0;},
|
|
'sort|s' => \$sort,
|
|
'verbose|v' => sub { $verbose ++; $quiet = 0; },
|
|
'find|f!' => \$find)
|
|
or pod2usage(2);
|
|
pod2usage(1) if $help;
|
|
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
|
|
|
|
unless (@ARGV) {
|
|
pod2usage(-exitstatus => 1,
|
|
-verbose => 0,
|
|
-message => "You must give a file or directory on the command line.");
|
|
}
|
|
|
|
# listing all files from pkgNG is quite slow, so avoid
|
|
if ($PKGNG_MODE and $cacheall) { $cacheall = 0; }
|
|
|
|
# declare subroutines
|
|
|
|
sub wanted;
|
|
sub debug;
|
|
sub safebacktick(@);
|
|
sub readcache();
|
|
sub readorigins();
|
|
|
|
my $pf2p;
|
|
my $pfiles;
|
|
my $pogn;
|
|
my $pall;
|
|
my $pallomap;
|
|
my $pfilesmulti;
|
|
|
|
if ($PKGNG_MODE) {
|
|
$pf2p = sub ($) { return safebacktick($PKGNG, 'which', '-q', $_[0]); };
|
|
$pfiles = sub ($) { return safebacktick($PKGNG, 'info', '-ql', $_[0]); };
|
|
$pogn = sub ($) { return safebacktick($PKGNG, 'info', '-qo', $_[0]); };
|
|
$pall = sub () { return safebacktick($PKGNG, 'info', '-q'); };
|
|
$pallomap = sub () { return map { s/\s+/:/; $_; }
|
|
safebacktick($PKGNG, 'info', '-o', '-a'); };
|
|
$pfilesmulti = sub (@) { return safebacktick($PKGNG, 'info', '-l', @_); };
|
|
} else {
|
|
$pf2p = sub ($) { return safebacktick($PKG_INFO, '-qGW', $_[0]); };
|
|
$pfiles = sub ($) { return safebacktick($PKG_INFO, '-qGL', $_[0]); };
|
|
$pogn = sub ($) { return safebacktick($PKG_INFO, '-qGo', $_[0]); };
|
|
$pall = sub () { return safebacktick($PKG_INFO, '-EG', '-a'); };
|
|
$pallomap = sub () { return safebacktick($PKG_INFO, '-QGoa'); };
|
|
$pfilesmulti = sub (@) { return safebacktick($PKG_INFO, '-QGL', @_); };
|
|
}
|
|
|
|
# define variables
|
|
|
|
my %ufiles = ();
|
|
my @pkgs = ();
|
|
|
|
my $wantsort = $sort ? sub { return sort @_; }
|
|
: sub { return @_; };
|
|
|
|
# Validate @ARGV
|
|
my $idx = 0;
|
|
while ($idx <= $#ARGV) {
|
|
if (lstat($ARGV[$idx]) > 0) {
|
|
$idx++;
|
|
} else {
|
|
warn "Cannot stat $ARGV[$idx]: $!, skipping";
|
|
delete $ARGV[$idx]; # leaves indices stable
|
|
$idx++;
|
|
$rc = 1;
|
|
}
|
|
};
|
|
|
|
# Obtain file list
|
|
if ($find) {
|
|
File::Find::find({wanted => \&wanted,
|
|
no_chdir => 1,
|
|
untaint => 1},
|
|
@ARGV);
|
|
} else {
|
|
foreach my $i (@ARGV) {
|
|
$i =~ qr|^([-+@\w./]+)$|;
|
|
$ufiles{$1} = 1;
|
|
}
|
|
}
|
|
|
|
my @notfound=(); # to record files not matched
|
|
|
|
# Obtain package info if desired
|
|
my ($f2p, $pfl) = readcache() if $cacheall;
|
|
my %p2o = readorigins() if $cacheall and $origins;
|
|
|
|
my $f;
|
|
# main loop here:
|
|
# - pick random file from hash,
|
|
# - look up the package name (from hash or with pkg_info)
|
|
# - look up list of files in package
|
|
# - purge all files recorded as belonging to package from the hash
|
|
|
|
while ($f = each %ufiles) {
|
|
# Find package for file $f and store in $p:
|
|
debug "matching $f\n";
|
|
my $p = $cacheall ? $$f2p{$f} : &$pf2p($f);
|
|
if (!$p) {
|
|
debug "file $f not in packages\n";
|
|
push @notfound, $f;
|
|
delete $ufiles{$f};
|
|
next;
|
|
}
|
|
chomp $p;
|
|
if ($p !~ $UNTAINT) {
|
|
warn "tainted package name $p, skipping\n";
|
|
next;
|
|
} else {
|
|
$p = $1; # laundered
|
|
}
|
|
debug "got package $p\n";
|
|
|
|
# Obtain file list for package and purge from %ufiles:
|
|
push @pkgs, $p;
|
|
my @pf = $cacheall ? @{$$pfl{$p}} : &$pfiles($p);
|
|
chomp @pf;
|
|
debug "deleting files @pf\n";
|
|
delete @ufiles{@pf};
|
|
keys %ufiles; # reset hash iterator without overhead (void context)
|
|
}
|
|
|
|
# If desired, map package names to package origins:
|
|
if ($origins) {
|
|
if ($cacheall) {
|
|
@pkgs = map { $_ = $p2o{$_}; } @pkgs;
|
|
} else {
|
|
@pkgs = map { $_ = &$pogn($_); chomp $_; $_; } @pkgs;
|
|
}
|
|
}
|
|
|
|
# Output:
|
|
print "Packages:\n" unless $quiet;
|
|
print join("\n", &$wantsort(@pkgs)), "\n";
|
|
print "\n" unless $quiet;
|
|
if ($verbose) {
|
|
print "Unmatched files:\n", join("\n", &$wantsort(@notfound)), "\n\n";
|
|
}
|
|
|
|
exit $rc;
|
|
|
|
# Subroutines ########################################################
|
|
|
|
# wanted - used for File::Find as it traverses the tree,
|
|
# we populate %ufiles.
|
|
sub wanted {
|
|
my ($dev,$ino,$mode,$nlink,$uid,$gid);
|
|
|
|
if ((($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && ! -d _)
|
|
{
|
|
# only record clean names
|
|
if ($_ =~ $UNTAINT and $1) {
|
|
$ufiles{$1} = 1;
|
|
} else {
|
|
debug "skipping tainted file name $_";
|
|
}
|
|
}
|
|
}
|
|
|
|
# if $debug is set, print a debug banner and the arguments to STDERR
|
|
sub debug {
|
|
print STDERR "DEBUG: ", @_ if $debug;
|
|
}
|
|
|
|
# safe variant of @foo = `command` - doesn't invoke a shell.
|
|
sub safebacktick(@) {
|
|
my @args = @_;
|
|
my @data = ();
|
|
my $pid;
|
|
|
|
die "Can't fork: $!" unless defined($pid = open(KID, "-|"));
|
|
if ($pid) {
|
|
@data = <KID>;
|
|
close KID
|
|
or warn $! ? "Error reading from kid: $!"
|
|
: "Exit status $? from kid";
|
|
} else {
|
|
debug "running '", join("' '", @args), "'\n";
|
|
exec { $args[0] } @args;
|
|
}
|
|
return wantarray ? @data : $data[0];
|
|
}
|
|
|
|
# build a hash of file-to-package
|
|
# and a hash of package-to-filelist (contains array references)
|
|
# and return references to either.
|
|
sub readcache() {
|
|
my %f2p = (); # file-to-package hash (string, string)
|
|
my %pfl = (); # package-files hash (string, array)
|
|
my @pkgs = map { $_ =~ $UNTAINT; $1; } &$pall();
|
|
my $n = scalar @pkgs;
|
|
debug "subreadcache: got $n packages.\n";
|
|
# Request file lists of so many packages at once, to save the
|
|
# overhead of forking and executing pkg_info and its initialization.
|
|
# This speeds up things by an order of magnitude or so.
|
|
my $chunksize = 100;
|
|
while (my @p = splice(@pkgs, 0, $chunksize)) {
|
|
my @fl = &$pfilesmulti(@p);
|
|
chomp @fl;
|
|
my $pkg;
|
|
map {
|
|
$_ =~ $UNTAINT;
|
|
while (s|^([^/:]+:)||o) {
|
|
$pkg = $1;
|
|
$pkg =~ s/:$//; # strip trailing colon
|
|
}
|
|
s/^\s+//o;
|
|
if ($_) { # file name
|
|
if ($pkg) { $f2p{$_} = $pkg; push @{$pfl{$pkg}}, $_;}
|
|
else { warn "pkg_info fault, missed package prefix before line $_."; }
|
|
} elsif ($_ ne '') {
|
|
warn "tainted file name in $pkg: $_";
|
|
}
|
|
} @fl;
|
|
}
|
|
debug "subreadcache: got ", scalar keys %f2p, " files.\n";
|
|
return (\%f2p, \%pfl);
|
|
}
|
|
|
|
# build a hash of package-to-origin and return it
|
|
sub readorigins() {
|
|
my %p2o = ();
|
|
my @ol = &$pallomap();
|
|
chomp @ol;
|
|
my ($k, $v);
|
|
map { $_ =~ $UNTAINT;
|
|
($k, $v) = split /:/,$_,2;
|
|
$p2o{$k} = $v;
|
|
} @ol;
|
|
return %p2o;
|
|
}
|
|
|
|
__END__
|
|
|
|
=pod
|
|
|
|
=head1 EXAMPLES
|
|
|
|
Obtain the sorted list of all packages that installed at least one file under
|
|
/usr/local/lib/python2.6/site-packages:
|
|
|
|
pkgs_which --sort /usr/local/lib/python2.6/site-packages
|
|
|
|
|
|
Upgrade all packages that installed at least one file under
|
|
/usr/local/lib/python2.6/site-packages (this assumes a Bourne-shell such
|
|
as sh, ash, ksh, bash):
|
|
|
|
portmaster -d $(pkgs_which -qo /usr/local/lib/python2.6/site-packages)
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<pkg_info>(8), L<portmaster>(8), L<portupgrade>(8), L<pkg_which>(8)
|
|
|
|
=head1 HISTORY
|
|
|
|
0.4.1 2014-02-11
|
|
- do not require files given on command line are regular files,
|
|
but accept any non-directory (for instance, symlinks).
|
|
|
|
Workaround for previous versions: use --no-find if you intend to
|
|
look up non-regular files.
|
|
|
|
0.4.0 2013-11-28
|
|
- support pkgNG. Known issue is that pkg which returns bogus exit
|
|
codes, spamming your screen. pkgs_which works nonetheless.
|
|
https://github.com/freebsd/pkg/issues/657
|
|
|
|
Note that pkgNG always uses --nocache implictly for speed:
|
|
https://github.com/freebsd/pkg/issues/658
|
|
|
|
Known issue: the pkgNG detection is a hack. It just looks for the
|
|
executable and the database in default locations, but does not
|
|
attempt to run "pkg -N".
|
|
|
|
0.3.0 2013-03-11
|
|
- read pkg_info -L information in chunks of 100 packages at a time,
|
|
to avoid forking once per package, which was slow.
|
|
|
|
0.2.0 2011-07-25
|
|
- fixed a bug where skipping non-existent command line arguments
|
|
failed and resulted in an unterminated (endless) loop.
|
|
|
|
- added the --no-find option
|
|
|
|
0.1.0 2011-03-12
|
|
- pkgs_which made its first appearance in the FreeBSD ports tree
|
|
|
|
=head1 AUTHORS
|
|
|
|
Copyright 2011, 2013 Matthias Andree <mandree@FreeBSD.org>.
|
|
All rights reserved. This script is exclusively licensed under the GNU
|
|
General Public License version 3, or any later version.
|
|
|
|
=cut
|