pkgsrc/mk/fetch/fetch
jperkin dc189ce30d mk: Rewrite the checksum script in awk.
The previous shell script version's runtime was quadratic against the
number of distfiles to verify.  Historically this has not been an issue,
with usually only a handful of files per package.  However, with the
introduction of Go modules the number of distfiles used by a single
package can be very high.

For example, in an upcoming update of www/grafana to version 7.1.5, the
number of GO_MODULE_FILES is 821.  Running 'bmake checksum' takes:

  real    18m20.743s
  user    17m27.975s
  sys     0m49.239s

With the awk code, this is reduced to a far more sensible:

  real    0m4.330s
  user    0m3.241s
  sys     0m0.875s

The script has been written to emulate the previous version precisely,
preserving the same output and error messages and supporting all of its
behaviour, with the one exception that previous exit values of 128 have
been changed to 3, in order to avoid any potential signed 8-bit issues.

The one change in the pkgsrc infrastructure is that the mk/fetch/fetch
script no longer sets a working default value for ${CHECKSUM}.  This is
not a problem in a pkgsrc environment as all of the required variables
are set correctly, but if there happen to be any users who are using
this script in a standalone environment, they will need to set it
accordingly.  This was probably required in many situations previously
anyway, as none of the script's environment variables were set, and
trying to support this would be fragile at best.
2020-08-27 11:45:45 +00:00

311 lines
8.6 KiB
Bash
Executable file

#!/bin/sh
#
# $NetBSD: fetch,v 1.20 2020/08/27 11:45:45 jperkin Exp $
#
# Copyright (c) 2006, 2015 The NetBSD Foundation, Inc.
# All rights reserved.
#
# This code is derived from software contributed to The NetBSD Foundation
# by Johnny C. Lam.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
######################################################################
#
# NAME
# fetch -- fetch files via URLs
#
# SYNOPSIS
# fetch [-c] [-d dir] [-f distinfo] [-p hook] [-r] [-v] file [site ...]
#
# DESCRIPTION
# fetch will attempt to fetch the file from the list of specified
# sites in the order given. The complete URL to the file on each
# site should be the concatenation of the specified site and file.
# If the file cannot be fetched successfully, then we try the next
# listed site.
#
# If the file already exists on the disk and is verified, then
# no fetch action is taken.
#
# OPTIONS
# -c Verify the checksum for the file. If the checksum
# does not match, then the fetch is determined to be
# not successful.
#
# -d dir Fetch the files into the specified directory.
#
# -f distinfo
# The path to the distinfo file containing the checksums
# for the file. The file format should match what is
# needed by the pkgsrc/mk/checksum/checksum script.
#
# -p hook
# After a successful fetch, run hook. The first argument is
# the relative path of the distfile and the second argument
# the full URL the file was obtained from.
#
# -r Resume a previous fetch for the file. In this case,
# the file is first saved to a ".pkgsrc.resume" file,
# and is later renamed to the final file name if the
# complete file has been sucessfully fetched.
#
# -v Show the actual command line used to fetch the file
# from each site.
#
# ENVIRONMENT
# PKGSRCDIR This is a hint to help locate the default
# checksum script.
#
# CHECKSUM This is the path to the checksum script used
# when "-c" is specified.
#
# FETCH_CMD This is the actual command used for transferring
# files from the various sites.
#
# The following are lists of options to pass to ${FETCH_CMD}:
#
# FETCH_BEFORE_ARGS
# These options appear before all other options.
#
# FETCH_AFTER_ARGS
# These options appear after all other options.
#
# FETCH_RESUME_ARGS
# These options appear just after FETCH_BEFORE_ARGS
# options and cause ${FETCH_CMD} to resume a
# previous file transfer.
#
# FETCH_OUTPUT_ARGS
# These options specify the name of the local file
# that will hold the contents of the fetched file.
#
######################################################################
: ${PKGSRCDIR:=/usr/pkgsrc}
: ${CHECKSUM:=false}
: ${CP:=cp}
: ${ECHO:=echo}
: ${FETCH_CMD:=ftp}
: ${MKDIR:=mkdir}
: ${MV:=mv}
: ${TEST:=test}
: ${TOUCH:=touch}
: ${WC:=wc}
self="${0##*/}"
usage() {
${ECHO} 1>&2 "usage: $self [-c] [-d dir] [-f distinfo] [-p hook] [-r] [-v] file [site ...]"
}
# Process optional arguments
checksum=
distinfo=
fetchdir=. # A relative directory or "."
post_fetch=
resume=
verbose=
while ${TEST} $# -gt 0; do
case "$1" in
-c) checksum=yes; shift ;;
-d) fetchdir="$2"; shift 2 ;;
-f) distinfo="$2"; shift 2 ;;
-p) post_fetch="$2"; shift 2 ;;
-r) resume=yes; shift ;;
-v) verbose=yes; shift ;;
--) shift; break ;;
-*) ${ECHO} 1>&2 "$self: unknown option -- ${1#-}"
usage
exit 1
;;
*) break ;;
esac
done
if ${TEST} -n "$checksum" -a -z "$distinfo"; then
${ECHO} 1>&2 "$self: \`\`-c'' requires \`\`-f distinfo''."
exit 1
fi
if ${TEST} -n "$resume"; then
if ${TEST} -z "$distinfo"; then
${ECHO} 1>&2 "$self: \`\`-r'' requires \`\`-f distinfo''."
resume=
elif ${TEST} "x${FETCH_RESUME_ARGS}" = "x"; then
${ECHO} 1>&2 "$self: \`\`-r'' requires FETCH_RESUME_ARGS to be non-empty."
resume=
fi
${TEST} -n "$resume" ||
${ECHO} 1>&2 "$self: Falling back to non-resume fetch."
fi
# Process required arguments
if ${TEST} $# -lt 1; then
usage
exit 1
fi
file="$1"; shift
path="$fetchdir/$file"
if ${TEST} -n "$distinfo" && ${TEST} ! -f "$distinfo"; then
${ECHO} 1>&2 "$self: distinfo file missing: $distinfo"
exit 1
fi
# Compute the expected size of the fetched file.
distsize=
distunits=
if ${TEST} -n "$distinfo"; then
while read d_type d_file d_equals d_size d_units; do
case "$d_type" in
Size) ;; # only handle "Size" lines
*) continue ;;
esac
case "$fetchdir" in
".") ${TEST} "$d_file" = "($file)" || continue ;;
*) ${TEST} "$d_file" = "($path)" || continue ;;
esac
distsize="$d_size"; distunits="$d_units"
break
done < $distinfo
fi
# verify_file [-v] $file
# If we can checksum the file, then see if it matches the listed
# checksums in the distinfo file. If we can check the size, then
# check that instead. We strip off ".pkgsrc.resume" from the
# filename so that we can verify the checksum for the temporary
# fetch file as well.
#
verify_file() {
_if_verbose=:; if [ "x$1" = "x-v" ]; then shift; _if_verbose=; fi
_file="${1#./}"
${TEST} -f $_file || {
$_if_verbose ${ECHO} 1>&2 "$self: File $_file does not exist."
return 1
}
if ${TEST} -n "$checksum"; then
${CHECKSUM} -s ".pkgsrc.resume" $distinfo ${_file} || {
$_if_verbose ${ECHO} 1>&2 "$self: Checksum of the file $_file doesn't match."
return 1
}
return 0
elif ${TEST} -n "$distsize"; then
_size=`${WC} -c < $_file`
${TEST} "$_size" -eq "$distsize" || {
$_if_verbose ${ECHO} 1>&2 "$self: Size of the file $_file doesn't match."
return 1
}
return 0
fi
return 0;
}
# If the file already exists and it verifies, then we don't need to fetch
# it again.
#
if verify_file $path; then
exit 0
fi
${TEST} -d $fetchdir || ${MKDIR} -p $fetchdir 2>/dev/null
if ${TEST} ! -w $fetchdir; then
${ECHO} 1>&2 "$self: Cannot write to `cd $fetchdir && pwd`"
exit 1
fi
# Set the name of the output file. In the "resume" case, we initialize
# the fetch loop by ensuring that the temporary output file already
# exists.
#
outputfile="$file"
outputpath="$fetchdir/$outputfile"
if ${TEST} -n "$resume"; then
outputfile="${file}.pkgsrc.resume"
outputpath="$fetchdir/$outputfile"
if ${TEST} ! -f $outputpath; then
if ${TEST} -f $path; then
${CP} -f $path $outputpath
else
${RM} -f $outputpath
${TOUCH} $outputpath
fi
fi
#
# If the temporary file verifies, then we don't need to resume
# fetching it.
#
if verify_file $outputpath; then
${MV} -f $outputpath $path
exit 0
fi
size=`${WC} -c < $outputpath`
${ECHO} "=> Downloaded size: $size bytes"
fi
${TEST} -z "$distsize" || ${ECHO} "=> Total size: $distsize $distunits"
# Iterate over each site and try to fetch the file. We verify the fetched
# file to see if we need to try fetching from the next site.
#
while ${TEST} $# -gt 0; do
site="$1"; shift
case "$site" in
-*)
url=${site#-}
;;
*)
url=$site$file
;;
esac
( cd $fetchdir
if ${TEST} -n "$resume"; then
fetch_cmd="${FETCH_CMD} ${FETCH_BEFORE_ARGS} ${FETCH_RESUME_ARGS} ${FETCH_OUTPUT_ARGS} $outputfile $url"
else
fetch_cmd="${FETCH_CMD} ${FETCH_BEFORE_ARGS} ${FETCH_OUTPUT_ARGS} $outputfile $url ${FETCH_AFTER_ARGS}"
fi
${TEST} -z "$verbose" || ${ECHO} "$fetch_cmd"
$fetch_cmd )
if ${TEST} $? -ne 0; then
${ECHO} 1>&2 "$self: Unable to fetch expected file $file"
continue
fi
if verify_file -v $outputpath; then
${TEST} -z "$resume" || ${MV} -f $outputpath $path
break
fi
if ${TEST} -n "$resume"; then
if ${TEST} -f $path; then
${CP} -f $path $outputpath
else
${TOUCH} $outputpath
fi
fi
done
if ${TEST} -f $path && ${TEST} -n "$post_fetch"; then
"$post_fetch" $path "$url"
fi
if ${TEST} -f $path; then
exit 0
else
exit 1
fi