#!/usr/bin/awk -f # # $NetBSD: checksum.awk,v 1.3 2020/10/07 18:09:52 jperkin Exp $ # ########################################################################### # # NAME # checksum.awk -- checksum files # # SYNOPSIS # checksum.awk [options] distinfo [file ...] # # DESCRIPTION # checksum will verify the checksums in the distinfo file for each # of the files specified. # # The checksum utility exits with one of the following values: # # 0 All of the file checksums verify. # # 1 At least one of the file checksums did not match. # # 2 At least one of the files is missing any checksum. # # >2 An error occurred. # # OPTIONS # -a algorithm Only verify checksums for the specified algorithm. # # -p The specified files are patches, so strip out any # lines containing NetBSD RCS ID tags before # computing the checksums for verification. # # -s suffix Strip the specified suffix from the file names # when searching for the checksum. # # BUGS # The flow of this program is not performed in the most optimal way # possible, as it was deemed important to retain output compatibility # with the previous shell script implementation. # BEGIN { DIGEST = ENVIRON["DIGEST"] ? ENVIRON["DIGEST"] : "digest" SED = ENVIRON["SED"] ? ENVIRON["SED"] : "sed" # Retain output compatible with previous "checksum" shell script progname = "checksum" only_alg = "" distinfo = "" exitcode = 0 patch = 0 suffix = "" for (arg = 1; arg < ARGC; arg++) { opt = ARGV[arg] if (opt == "-a") { only_alg = ARGV[++arg] } else if (opt == "-p") { patch = 1 } else if (opt == "-s") { suffix = ARGV[++arg] } else if (opt == "--") { arg++ break } else if (match(opt, /^-.*/) != 0) { opt = substr(opt, RSTART + 1, RLENGTH) err(sprintf("%s: unknown option -- %s", progname, opt)) usage() exit 3 } else { break } } if (arg >= ARGC) { usage() exit 3 } distinfo = ARGV[arg++] cmd = sprintf("test -f %s", distinfo) if (system(cmd) != 0) { err(sprintf("%s: distinfo file missing: %s", progname, distinfo)) exit 3 } # # Initialise list of files to check, passed on the command line. In # order to keep things simple, distfiles[] is also used when operating # in patch mode (-p). # while (arg < ARGC) { distfile = ARGV[arg++] sfile = distfile if (suffix) { sfile = strip_suffix(sfile) } if (patch) { gsub(/.*\//, "", sfile) } # # Have we seen this file in distinfo? Used later to verify # that all checksums have been recorded. # seen[sfile] = 0 # # Store the filename to be checked in the distinfo file. The # -s flag allows temporary download files to be tested instead, # where the suffix will be stripped to match distinfo. # distfiles[sfile] = distfile } # # Parse the distinfo file for checksums that must be verified. We're # only interested in lines of the format: # # algorithm (distfile) = checksum # while (getline < distinfo) { if (NF != 4) { continue } if ($0 ~ /^(#|\$|Size)/) { continue } algorithm = $1 # strip "(filename)" -> "filename" distfile = substr($2, 2, (length($2) - 2)) checksum = $4 # Skip IGNORE lines (likely legacy at this point). if (checksum == "IGNORE") { continue } # If -a is set then skip non-matching algorithms. if (only_alg && tolower(algorithm) != tolower(only_alg)) { continue } # Skip if file not in distfiles. if (!(distfile in distfiles)) { continue } # # Handle patch files inline. As they need to be modified (by # removing the NetBSD RCD Id) they are parsed individually by # digest(1), and so we calculate the checksums now rather than # saving for later processing to simplify things. # if (patch) { patchfile = distfiles[distfile] cmd = sprintf("%s -e '/[$]NetBSD.*/d' %s | %s %s", SED, patchfile, DIGEST, algorithm) while ((cmd | getline) > 0) { checksums[algorithm, distfile] = $1 } close(cmd) continue } # # If not a patch file, then we're handling a distfile, where we # want to build a list of input files to digest(1) so they can # all be calculated in one go. # distsums[algorithm] = sprintf("%s %s", distsums[algorithm], distfiles[distfile]) } close(distinfo) # # We now have a list of distfiles to be checked for each algorithm, # pass them all to a single digest(1) command and parse the checksums # to be compared against distinfo. # for (algorithm in distsums) { cmd = sprintf("%s %s %s", DIGEST, algorithm, distsums[algorithm]) while ((cmd | getline) > 0) { # Should be unnecessary, but just in case. If we want # to be really paranoid then test that $1 == algorithm. if (NF != 4) { continue } # strip "(filename)" -> "filename" distfile = substr($2, 2, length($2) - 2) if (suffix) { distfile = strip_suffix(distfile) } checksums[$1, distfile] = $4 } close(cmd) } # # Now that we have computed all the necessary checksums for all of the # files listed on the command line, go back through distinfo and verify # that they all match. # while (getline < distinfo) { if (NF != 4) { continue } if ($0 ~ /^(#|\$|Size)/) { continue } algorithm = $1 # strip "(filename)" -> "filename" distfile = substr($2, 2, (length($2) - 2)) checksum = $4 # If -a is set then skip non-matching algorithms. if (only_alg && tolower(algorithm) != tolower(only_alg)) { continue } # Skip if file not in distfiles. if (!(distfile in distfiles)) { continue } # This is likely very legacy at this point. if (checksum == "IGNORE") { err(sprintf("%s: Ignoring checksum for %s", progname, distfile)) continue } if (checksums[algorithm,distfile] == checksum) { printf("=> Checksum %s OK for %s\n", algorithm, distfile) seen[distfile] = 1 } else { err(sprintf("%s: Checksum %s mismatch for %s", progname, algorithm, distfile)) exit 1 } } close(distinfo) # # Check that all distfiles supplied on the command line have at least # one matching checksum. # for (distfile in distfiles) { if (seen[distfile]) continue if (only_alg) { err(sprintf("%s: No %s checksum recorded for %s", progname, only_alg, distfile)) } else { err(sprintf("%s: No checksum recorded for %s", progname, distfile)) } exitcode = 2 } exit(exitcode) } function err(errmsg) { printf("%s\n", errmsg) > "/dev/stderr" } function usage() { err(sprintf("usage: %s [-a algorithm] [-p] [-s suffix]" \ " distinfo [file ...]", progname)) } # # In order to provide maximum compatibility, the following function attempts # to strip the exact string suffix, rather than a simple sub() which may # interpret e.g. dots incorrectly as it uses regular expressions. # # "suffix" is a global variable, and this function is only called when it is # set. # function strip_suffix(filename) { len_file = length(filename) len_sufx = length(suffix) len_s1 = len_file - len_sufx if (len_s1 <= 0) return filename s1 = substr(filename, 1, len_s1) s2 = substr(filename, len_s1 + 1, len_sufx) if (s2 == suffix) { return s1 } else { return filename } }