#!/bin/sh exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*- #!perl -w ############################################################## ### ### ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ### ### ### ############################################################## ### $Revision: 1.117 $, $Date: 1999/07/30 03:19:51 $, $Author: kfogel $ ### ## (C) 1999 Karl Fogel , under the GNU GPL. ## ## (Extensively hacked on by Melissa O'Neill .) ## ## cvs2cl.pl is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2, or (at your option) ## any later version. ## ## cvs2cl.pl is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You may have received a copy of the GNU General Public License ## along with cvs2cl.pl; see the file COPYING. If not, write to the ## Free Software Foundation, Inc., 59 Temple Place - Suite 330, ## Boston, MA 02111-1307, USA. use strict; use Text::Wrap; use Time::Local; use File::Basename; # The Plan: # # Read in the logs for multiple files, spit out a nice ChangeLog that # mirrors the information entered during `cvs commit'. # # The problem presents some challenges. In an ideal world, we could # detect files with the same author, log message, and checkin time -- # each would be a changelog entry. # We'd sort them; and spit them out. Unfortunately, CVS is *not atomic* # so checkins can span a range of times. Also, the directory structure # could be hierarchical. # # Another question is whether we really want to have the ChangeLog # exactly reflect commits. An author could issue two related commits, # with different log entries, reflecting a single logical change to the # source. GNU style ChangeLogs group these under a single author/date. # We try to do the same. # # So, we parse the output of `cvs log', storing log messages in a # multilevel hash that stores the mapping: # directory => author => time => message => filelist # When we've read all the logs, we twist this mapping into # a time => author => message => filelist mapping for each directory. # It's during this twisting phase that we notice times that were nearby # and merge them. # # If we're not using the `--distributed' flag, the directory is always # considered to be `./', even as descend into subdirectories. ############### Globals ################ # What we run to generate it: my $Log_Source_Command = "cvs log"; # In case we have to print it out: my $VERSION = '$Revision: 1.117 $'; $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/; ## Vars set by options: # Print debugging messages? my $Debug = 0; # Just show version and exit? my $Print_Version = 0; # Just print usage message and exit? my $Print_Usage = 0; # Single top-level ChangeLog, or one per subdirectory? my $Distributed = 0; # What file should we generate (defaults to "ChangeLog")? my $Log_File_Name = "ChangeLog"; # Expand usernames to email addresses based on a map file? my $User_Map_File = ""; # Output to a file or to stdout? my $Output_To_Stdout = 0; # Eliminate empty log messages? my $Prune_Empty_Msgs = 0; # Include revision numbers in output? my $Include_Revisions = 0; # Include tags (symbolic names) in output? my $Include_Tags = 0; # Show branches by symbolic name in output? my $Show_Branches = 0; # Maybe only show log messages matching a certain regular expression. my $Regexp_Gate = ""; # Pass this global option string along to cvs, to the left of `log': my $Global_Opts = ""; # Pass this option string along to the cvs log subcommand: my $Command_Opts = ""; # Read log output from stdin instead of invoking cvs log? my $Input_From_Stdin = 0; # Max checkin duration. CVS checkin is not atomic, so we may have checkin # times that span a range of time. We assume that checkins will last no # longer than $Max_Checkin_Duration seconds, and that similarly, no # checkins will happen from the same users with the same message less # than $Max_Checkin_Duration seconds apart. my $Max_Checkin_Duration = 180; # What to put at the front of [each] ChangeLog. # (Default can be overridden by option.) my $ChangeLog_Header = <<'END_OF_HEADER'; -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- NOTE: This file was automatically generated by `cvs2cl.pl'. If you edit this file, your changes will be lost when someone next runs cvs2cl.pl. See http://www.red-bean.com/~kfogel/cvs2cl.shtml for more. -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- -+- END_OF_HEADER ## end vars set by options. # In 'cvs log' output, one long unbroken line of equal signs separates # files: my $file_separator = "=======================================" . "======================================"; # In 'cvs log' output, a shorter line of dashes separates log messages # within a file: my $logmsg_separator = "----------------------------"; ############### End globals ############ &parse_options (); &derive_change_log (); ### Everything below is subroutine definitions. ### # Fills up a ChangeLog structure in the current directory. sub derive_change_log () { # See "The Plan" above for a full explanation. my %grand_poobah; my $file_full_path; my $time; my $revision; my $author; my $msg_txt; my $detected_file_separator; # We might be expanding usernames my %usermap; # In general, it's probably not very maintainable to use state # variables like this to tell the loop what it's doing at any given # moment, but this is only the first one, and if we never have more # than a few of these, it's okay. my $collecting_symbolic_names = 0; my %symbolic_names; # Where tag names get stored. my %branch_names; # We'll grab branch names while we're at it. my @branch_roots; # For showing which files are branch ancestors. if (! $Input_From_Stdin) { open (LOG_SOURCE, "$Log_Source_Command |") or die "unable to run \"${Log_Source_Command}\""; } else { open (LOG_SOURCE, "-") or die "unable to open stdin for reading"; } %usermap = &maybe_read_user_map_file (); while () { # If on a new file and don't see filename, skip until we find it, and # when we find it, grab it. if ((! (defined $file_full_path)) and /^Working file: (.*)/) { $file_full_path = $1; next; } # Collect tag names in case we're asked to print them in the output. if (/^symbolic names:$/) { $collecting_symbolic_names = 1; next; # No point continuing, as there's no more info on this line } if ($collecting_symbolic_names) { # All tag names are listed with whitespace in front in cvs log # output; so if see non-whitespace, then we're done collecting. if (/^\S/) { $collecting_symbolic_names = 0; } else # we're looking at a tag name, so parse & store it { # According to the Cederqvist manual, in node "Tags", tag # names must start with an uppercase or lowercase letter and # can contain uppercase and lowercase letters, digits, `-', # and `_'. However, it's not our place to enforce that, so # we'll allow anything CVS hands us to be a tag: /^\s([^:]+): ([\d.]+)$/; my $tag_name = $1; my $tag_rev = $2; push (@{$symbolic_names{$tag_rev}}, $tag_name); # You can always tell a branch by the ".0." as the # second-to-last digit in the revision number. if ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) { my $real_branch_rev = $1 . $3; $branch_names{$real_branch_rev} = $tag_name; } } } # End of code for collecting tag names. # If have file name, but not revision, and see revision, then grab # it. (We collect it now even though we may or may not need it.) if ((! (defined $revision)) and /^revision (\d+\.[\d.]+)/) { $revision = $1; next; } # If have file name but not time and author, and see date or # author, then grab them: unless (defined $time) { if (/^date: .*/) { ($time, $author) = &parse_date_and_author ($_); if (defined ($usermap{$author}) and $usermap{$author}) { $author = $usermap{$author}; } } else { $detected_file_separator = /^$file_separator$/o; if ($detected_file_separator) { # no revisions are listed for this file; (happens e.g. for # "cvs log -d") goto CLEAR; } } # If the date/time/author hasn't been found yet, we couldn't # possibly care about anything we see. So skip: next; } # A "branches: ..." line here indicates that one or more branches # are rooted at this revision. If we're showing branches, then we # want to show that fact as well, so we collect all the branches # that this is the latest ancestor of and store them in # @branch_roots. In the output, we'd print something like # # * hello.c (1.5, branches to: branch_name1, branch_name2): # reverted to 1.3 code. # # Just for reference, the format of the line we're seeing at this # point is: # # branches: 1.5.2; 1.5.4; ...; # # Okay, here goes: if ($Show_Branches and (/^branches:\s+(.*);$/)) { my $lst = $1; $lst =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1 if ($lst) { @branch_roots = split (/;\s+/, $lst); } else { undef @branch_roots; } next; } # If have file name, time, and author, then we're just grabbing # log message texts: $detected_file_separator = /^$file_separator$/o; if ($detected_file_separator && ! (defined $revision)) { # no revisions listed for this file; happens e.g. for "cvs log -d" goto CLEAR; } unless ($detected_file_separator || /^$logmsg_separator$/o) { $msg_txt .= $_; # Normally, just accumulate the message... next; } # ... until a msg separator is encountered: # Ensure the message contains something: if ((! $msg_txt) || ($msg_txt =~ /^\s*\.\s*$|^\s*$/) || ($msg_txt =~ /\*\*\* empty log message \*\*\*/)) { if ($Prune_Empty_Msgs) { goto CLEAR; } # else $msg_txt = "[no log message]\n"; } ### Store it all in the Grand Poobah: { my $dir_key; # key into %grand_poobah my $fname; # the file name we record under if ($Distributed) { ($fname, $dir_key, undef) = fileparse ($file_full_path); } else { $dir_key = "./"; $fname = $file_full_path; } # We might be including revision numbers and/or tags and/or # branch names in the output. Most of the code from here to # loop-end deals with combining these behaviors. my $rev_to_print = $revision; if ($Show_Branches) { $revision =~ /([\d.]+)\d+/; my $branch_prefix = $1; $branch_prefix =~ s/\.$//; # strip off final dot if ($branch_names{$branch_prefix}) { # If we're showing branches *and* revisions, then we show # branches as branch.N, where N is the revision number on # the branch. But if we're showing branches only, then we # just show the branch name with no revision number. if ($Include_Revisions) { $rev_to_print =~ s/$branch_prefix/$branch_names{$branch_prefix}/; } else { $rev_to_print =~ s/$branch_prefix\.\d+/$branch_names{$branch_prefix}/; } } # If there's anything in the @branch_roots array, then this # revision is the root of at least one branch. We'll display # them as branch names instead of revision numbers, the # substitution for which is done directly in the array: if (@branch_roots) { my $roots = join ', ', map { $branch_names{$_} } @branch_roots; $rev_to_print .= ", branches to: ${roots}"; } } if ($Include_Tags && (defined ($symbolic_names{$revision}))) { my $tags = join (", ", @{$symbolic_names{$revision}}); if ($Include_Revisions) { $fname .= " ($rev_to_print; tags: $tags)"; } else { $fname .= " (tags: $tags)"; } undef $symbolic_names{$revision}; } elsif ($Include_Revisions) { $fname .= " ($rev_to_print)"; } elsif ($Show_Branches) { # If we're only showing branches, and not revisions, then we # only tack on the revision "number" if it's really a branch # name. Conveniently, CVS guarantees that tags never begin # with a digit, so we can use that as our test. if (! ($rev_to_print =~ /^[\d]/)) { $fname .= " ($rev_to_print)"; } } # One last check: make sure it passes the regexp test, if the # user asked for that. We have to do it here, so that the # test can match against tags/branches (i.e., against # $fname as well as $author and $msg_txt). if ($Regexp_Gate && ($msg_txt !~ /$Regexp_Gate/o) && ($fname !~ /$Regexp_Gate/o) && ($author !~ /$Regexp_Gate/o)) { goto CLEAR; } # Add this file to the list # (We use many spoonfuls of autovivication magic. Hashes and arrays # will spring into existence if they aren't there already.) &debug ("(pushing log msg for ${dir_key}${fname})\n"); push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, $fname); } CLEAR: # Make way for the next message undef $msg_txt; undef $time; undef $revision; undef $author; undef @branch_roots; # Maybe even make way for the next file: if ($detected_file_separator) { undef $file_full_path; undef %branch_names; } } close (LOG_SOURCE); ### Process each ChangeLog while (my ($dir,$authorhash) = each %grand_poobah) { &debug ("DOING DIR: $dir\n"); # Here we twist our hash around, from being # author => time => message => filelist # in %$authorhash to # time => author => message => filelist # in %changelog. As we do this twist, we notice nearby times and # fix them to be the same. (To save space, we zap %$authorhash after # we've copied everything out of it.) my %changelog; while (my ($author,$timehash) = each %$authorhash) { my $lasttime; my $stamptime; foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash)) { my $msghash = $timehash->{$time}; while (my ($msg,$filelist) = each %$msghash) { if ((defined $lasttime) and (($time - $lasttime) < $Max_Checkin_Duration)) { push(@{$changelog{$stamptime}{$author}{$msg}}, @$filelist); } else { $changelog{$time}{$author}{$msg} = $filelist; $stamptime = $time; } $lasttime = $time; } } } undef (%$authorhash); ### Now we can write out the ChangeLog! my ($logfile_here, $logfile_bak, $tmpfile); if (! $Output_To_Stdout) { $logfile_here = $dir . $Log_File_Name; $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem $tmpfile = "${logfile_here}.cvs2cl$$.tmp"; $logfile_bak = "${logfile_here}.bak"; open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\""; } else { open (LOG_OUT, ">-") or die "Unable to open stdout for writing"; } print LOG_OUT $ChangeLog_Header; foreach my $time (sort {$main::b <=> $main::a} (keys %changelog)) { my $authorhash = $changelog{$time}; while (my ($author,$mesghash) = each %$authorhash) { # todo: expand author to a full name, somehow, someday? my ($ignore,$min,$hour,$mday,$mon,$year) = gmtime($time); printf LOG_OUT "%4u-%02u-%02u %02u:%02u %s\n\n", $year+1900, $mon+1, $mday, $hour, $min, $author; while (my ($msg,$filelist) = each %$mesghash) { my $files = &pretty_file_list ($filelist); my $text = &pretty_msg_text ($msg); # See page 512 of Perl Programming, 2nd Edition, about "wrap()" print LOG_OUT wrap ("\t", " ", "$files", "$text"), "\n"; } } } close (LOG_OUT); if (! $Output_To_Stdout) { if (-f $logfile_here) { rename ($logfile_here, $logfile_bak); } rename ($tmpfile, $logfile_here); } } } sub parse_date_and_author () { # Parses the date/time and author out of a line like: # # date: 1999/02/19 23:29:05; author: apharris; state: Exp; my $line = shift; my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~ m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);# or die "Couldn't parse date ``$line''"; die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258); # Kinda arbitrary, but useful as a sanity check my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900); return ($time, $author); } sub pretty_file_list () { my $filesref = shift; my @files = @$filesref; my $pretty_str = join(', ', @files); # todo: refill to standard ChangeLog proportions $pretty_str = "* $pretty_str:"; return $pretty_str; } sub pretty_msg_text () { my $text = shift; # If it *looks* like two newlines, make it *be* two newlines: $text =~ s/\n\s*\n/\n\n/g; # Strip off lone newlines, but only for lines that don't begin with # whitespace or a mail-quoting character, since we want to preserve # that kind of formatting. Also don't strip newlines that follow a # period; we handle those specially next. 1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g); # If a newline follows a period, make sure that when we bring up the # bottom sentence, it begins with two spaces. 1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g); return $text; } sub maybe_read_user_map_file () { my %expansions; if ($User_Map_File) { open (MAPFILE, "<$User_Map_File") or die ("Unable to open $User_Map_File ($!)"); while () { my ($username, $expansion) = split ':'; chomp $expansion; $expansion =~ s/^'(.*)'$/$1/; $expansion =~ s/^"(.*)"$/$1/; # If it looks like the expansion has a real name already, then # we toss the username we got from CVS log. Otherwise, keep # it to use in combination with the email address. if ($expansion =~ /^\s*<{0,1}\S+@.*/) { # Also, add angle brackets if none present if (! ($expansion =~ /<\S+@\S+>/)) { $expansions{$username} = "$username <$expansion>"; } else { $expansions{$username} = "$username $expansion"; } } else { $expansions{$username} = $expansion; } } close (MAPFILE); } return %expansions; } sub parse_options () { # Check this internally before setting the global variable. my $output_file; # If this gets set, we encountered unknown options and will exit at # the end of this subroutine. my $exit_with_admonishment = 0; while (my $arg = shift (@ARGV)) { if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) { $Print_Usage = 1; } elsif ($arg =~ /^--debug$/) { # unadvertised option, heh $Debug = 1; } elsif ($arg =~ /^--version$/) { $Print_Version = 1; } elsif ($arg =~ /^-g$|^--global-opts$/) { my $opts = shift (@ARGV); # Don't assume CVS is called "cvs" on the user's system: $Log_Source_Command =~ s/(^\S*)/$1 $opts/; } elsif ($arg =~ /^-l$|^--log-opts$/) { my $opts = shift (@ARGV); $Log_Source_Command .= " $opts"; } elsif ($arg =~ /^-f$|^--file$/) { $output_file = shift (@ARGV); } elsif ($arg =~ /^-U$|^--usermap$/) { $User_Map_File = shift (@ARGV); } elsif ($arg =~ /^-W$|^--window$/) { $Max_Checkin_Duration = shift (@ARGV); } elsif ($arg =~ /^-R$|^--regexp$/) { $Regexp_Gate = shift (@ARGV); } elsif ($arg =~ /^--stdout$/) { $Output_To_Stdout = 1; } elsif ($arg =~ /^--version$/) { $Print_Version = 1; } elsif ($arg =~ /^-d$|^--distributed$/) { $Distributed = 1; } elsif ($arg =~ /^-P$|^--prune$/) { $Prune_Empty_Msgs = 1; } elsif ($arg =~ /^-r$|^--revisions$/) { $Include_Revisions = 1; } elsif ($arg =~ /^-t$|^--tags$/) { $Include_Tags = 1; } elsif ($arg =~ /^-b$|^--branches$/) { $Show_Branches = 1; } elsif ($arg =~ /^--stdin$/) { $Input_From_Stdin = 1; } elsif ($arg =~ /^--header$/) { my $header_filename = shift (@ARGV); $ChangeLog_Header = &slurp_file ($header_filename); if (! defined ($ChangeLog_Header)) { $ChangeLog_Header = ""; } } else { # Just add a filename as argument to the log command $Log_Source_Command .= " $arg"; } } ## Check for contradictions... if ($Output_To_Stdout && $Distributed) { print STDERR "cannot pass both --stdout and --distributed\n"; $exit_with_admonishment = 1; } if ($Output_To_Stdout && $output_file) { print STDERR "cannot pass both --stdout and --file\n"; $exit_with_admonishment = 1; } # Or if any other error message has already been printed out, we # just leave now: if ($exit_with_admonishment) { &usage (); exit (1); } elsif ($Print_Usage) { &usage (); exit (0); } elsif ($Print_Version) { &version (); exit (0); } ## Else no problems, so proceed. if ($Output_To_Stdout) { undef $Log_File_Name; # not actually necessary } elsif ($output_file) { $Log_File_Name = $output_file; } } sub slurp_file () { my $filename = shift || die ("no filename passed to slurp_file()"); my $retstr; open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)"); my $saved_sep = $/; undef $/; $retstr = ; $/ = $saved_sep; close (SLURPEE); return $retstr; } sub debug () { if ($Debug) { my $msg = shift; print STDERR $msg; } } sub version () { print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n"; } sub usage () { &version (); print <<'END_OF_INFO'; Run inside a CVS working copy to generates a GNU-style ChangeLog. Options: -h, -help, --help, or -? Show this usage and exit --version Show version and exit -d, --distributed Put ChangeLogs in subdirs --stdin Read from stdin, don't run cvs log --stdout Output to stdout not to ChangeLog -f FILE, --file FILE Use FILE instead of "ChangeLog" --header FILE Get ChangeLog header from FILE ("-" means stdin) -W SECS, --window SECS Window of time within which log entries unify -U UFILE, --usermap UFILE Expand usernames to email addresses from UFILE -R REGEXP, --regexp REGEXP Include only entries that match REGEXP -r, --revisions Include revision numbers in output -t, --tags Include tags (symbolic names) in output -b, --branches Show branch names in revisions when possible -P, --prune Don't show empty log messages -g OPTS, --global-opts OPTS Run "cvs OPTS log ..." -l OPTS, --log-opts OPTS Run "cvs ... log OPTS" FILE1 [FILE2 ...] Include log information only for named file(s) For the -U option, each line of UFILE is like "jimb:", or even with a full name, like "jimb:'Jim Blandy '". See http://www.red-bean.com/~kfogel/cvs2cl.shtml for maintenance and bug info. END_OF_INFO } __END__ =head1 NAME cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by running "cvs log" and parsing the output. Shared log entries are unified in an intuitive way. =head1 DESCRIPTION This script generates GNU-style ChangeLog files from CVS log information. Basic usage: just run it inside a working copy and a ChangeLog will appear. It requires repository access (i.e., 'cvs log' must work). Run "cvs2cl.pl --help" to see more advanced options. See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and for instructions on getting anonymous CVS access to this script. Maintainer: Karl Fogel Please report bugs to . =head1 README This script generates GNU-style ChangeLog files from CVS log information. Basic usage: just run it inside a working copy and a ChangeLog will appear. It requires repository access (i.e., 'cvs log' must work). Run "cvs2cl.pl --help" to see more advanced options. See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and for instructions on getting anonymous CVS access to this script. Maintainer: Karl Fogel Please report bugs to . =head1 PREREQUISITES This script requires C, C, and C. It also seems to require C or higher. =pod OSNAMES any =pod SCRIPT CATEGORIES Version_Control/CVS =cut -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- Note about a bug-slash-opportunity: ----------------------------------- There's a bug in Text::Wrap, which affects cvs2cl. This script reveals it: #!/usr/bin/perl -w use Text::Wrap; my $test_text = "This script demonstrates a bug in Text::Wrap. The very long line following this paragraph will be relocated relative to the surrounding text: ==================================================================== See? When the bug happens, we'll get the line of equal signs below this paragraph, even though it should be above."; # Print out the test text with no wrapping: print "$test_text"; print "\n"; print "\n"; # Now print it out wrapped, and see the bug: print wrap ("\t", " ", "$test_text"); print "\n"; print "\n"; If the line of equal signs were one shorter, then the bug doesn't happen. Interesting. Anyway, rather than fix this in Text::Wrap, we might as well write a new wrap() which has the following much-needed features: * initial indentation, like current Text::Wrap() * subsequent line indentation, like current Text::Wrap() * user chooses among: force-break long words, leave them alone, or die()? * preserve existing indentation: chopped chunks from an indented line are indented by same (like this line, not counting the asterisk!) * optional list of things to preserve on line starts, default ">" Note that the last two are essentially the same concept, so unify in implementation and give a good interface to controlling them. And how about: Optionally, when encounter a line pre-indented by same as previous line, then strip the newline and refill, but indent by the same. Yeah...