#! @PERL@ -w # @configure_input@ # Copyright (c) 2001-2006 by Martin Kammerhofer # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # Convert FreeBSD "commitlog" file(s) into HTML with links to cvsweb.cgi. # This script parses CVS commitlogs assembled by the log_accum.pl script. # @(#)$Id: commitlog2cvsweb.pl.in,v 1.29 2006/06/22 10:44:15 martin Exp $ # WARNING: The following code is very poor style. It has been hacked # a few times and needs a rewrite. However it still works fine for me. require 5.003 ; use strict ; use constant CONFENV => "CVSWEB_CONVERTERS_CONF" ; # ENV variable use constant CONFNAME => "cvsweb-converters.conf" ; # file basename use constant COMMON => "common" ; # default section in ini-file use Carp ; use Compress::Zlib ; use Config ; use Config::IniFiles ; use File::Basename ; use Getopt::Long ; { local $^W = 0 ; eval "use URI::Escape" ; if ($@) { # provide a dummy function if package URI::Escape is not available. sub uri_escape { return $_[0] ; } warn "$0: package URI::Escape is not available\n" ; } } my ( $prog, $prog_dir ) = fileparse( $0, "\.p(er)?l" ) ; my $version = '$Id: commitlog2cvsweb.pl.in,v 1.29 2006/06/22 10:44:15 martin Exp $' ; # '; $version =~ s/^\s*\$Id: // ; $version =~ s/ \$\s*$// ; my $debug = 0 ; # debuglevel: currently 2..5 are useful, see options -d, -debug # However using -d doesn't cover code until GetOptions() below! my $origPATH = $ENV{PATH} || "/bin:/usr/bin:@PREFIX@/bin" ; delete @ENV{qw(IFS CDPATH ENV BASH_ENV PATH)} ; # keep this comment line here! my ( $verbose, $pfx, $self_prefix, $unknown_prefix, $stdin_with_pfx, $cvsweb, $urlsuffix, $branch_wanted, $show_all_branches, $query_pr_cgi, $committer_wanted, $maximum_output, $hyperlink_committers, $and_patterns, $case_insensitive, $number_commits, $hr, $html_header_and_footer, $body_attributes, $extra_font1, $extra_font2, $branch_font1, $branch_font2, $log_font1, $log_font2, $file_headers, $commits_are_time_sequential, $sort_arguments, $start_date, $end_date, $perldoc, $outfile, $filenm_pattern, $commitlogdir, @logmsg_patterns, ) = # keep this comment line here! # DO NOT EDIT THE DEFAULTS HERE - use the configfile instead! # (Otherwise you have to start all over when you upgrade this script.) ( 0, # verbose flag (file summary lines to STDERR) "src/", # default prefix ${pfx} for directories inside repository # the following files get their name as prefix "CVSROOT|distrib|doc|ports|www", "other", # no known prefix for these files 0, # use ${pfx} on --stdin too "http://cvsweb.FreeBSD.org/", # URL of cvsweb.cgi "", # suffix to URL e.g. "cvsroot=myproject" "", # RCS branch e.g. "MAIN" 0, # show all branches in cvsweb, even if only commits to a # single branch are requested with $branch_wanted "http://www.FreeBSD.org/cgi/query-pr.cgi", # PR cgi "", # case insensitive committer pattern, e.g. "joe|fred" 0, # max. number of commit messages to output (0 for no limit) 0, # add hyperlinks to next/previous commit by same committer 0, # ANDing of log message patterns 0, # case insensitive pattern matching 1, # number all commits 1, # rules before each commit 1, # print HTML header and footer ( and ) "bgcolor=white", # attributes for HTML tag: "", # "" tag for per committer links and commit #s "", # "" tag for per committer links and commit #s "", # "" tag for branch "", # "" tag for branch "", # "" tag for searched text in log "", # "" tag for searched text in log 1, # wether to write

file headers 1, # commit times are always increasing in input files 0, # sort @ARGV according to modification dates (-M) undef, undef, # start/end date "perldoc", # pathname of perldoc script (if not in default PATH) "-", # outputfile "", # filename pattern, e.g. "netinet6?/tcp" "", # default commitlog directory, e.g. "/home/ncvs/CVSROOT/commitlogs" (), # grep patterns ) ; # map option names to variables my %optctl = ( "and" => \$and_patterns, "body-attributes" => \$body_attributes, "branch" => \$branch_wanted, "committer" => \$committer_wanted, "commitlogdir" => \$commitlogdir, "cvsweb" => \$cvsweb, "debuglevel" => \$debug, "d" => \$debug, # set or increment "fileheaders" => \$file_headers, "filename" => \$filenm_pattern, "from" => \$start_date, "headfoot" => \$html_header_and_footer, "hr" => \$hr, "insensitive" => \$case_insensitive, "link-committers" => \$hyperlink_committers, "logmessage" => \@logmsg_patterns, "maximum" => \$maximum_output, "number-commits" => \$number_commits, "outfile" => \$outfile, "prefix" => \$pfx, "queryprcgi" => \$query_pr_cgi, "sequential" => \$commits_are_time_sequential, "showallbranches" => \$show_all_branches, "stdin-with-prefix" => \$stdin_with_pfx, "sort-arguments" => \$sort_arguments, "to" => \$end_date, "urlsuffix" => \$urlsuffix, "verbose" => \$verbose, ) ; # print short usage information (synopsis) and exit with $?=64 sub usage { print STDERR "@_", "usage: $prog [--cvsweb=URL] [--urlsuffix=SFX] [--branch=TAG]\n", " [--showallbranches] [--outfile=FILE] [--committer=PATTERN]\n", " [--and] [--insensitive] [--logmessage=PATTERN]...\n", " [--link-committers] [--queryprcgi=URL] [--number-commits]\n", " [--maximum=NUM] [--sort-arguments] [--filename=PATTERN]\n", " [--commitlogdir=DIR] commitlog-file...\n", "or: $prog --help\n", "or: $prog --version\n" ; exit 64 ; } # show the pod documentation and exit sub help () { $ENV{PATH} = $origPATH ; { exec $perldoc, $0 } ; # try 'perldoc' in the same directory as perl itself $ENV{PATH} .= ":" . dirname( $Config{perlpath} ) ; { exec 'perldoc', $0 } ; print STDERR "$prog: cannot exec '$perldoc'\n" ; goto &usage ; # fallback } # html escape '<', '>', '&' and '"' sub html_escape ($) { local $_ = shift or croak "Missing argument" ; s/\&/&/g ; s/\"/"/g ; s/>/>/g ; s/= 2 ; my %config ; if ( !tie %config, 'Config::IniFiles', ( -file => $configfile, -default => COMMON ) ) { $" = "\n" ; die "$0: cannot tie to config file '$configfile'\n", "@Config::IniFiles::errors\n" ; } # get my (sub)section(s) my ( @sections, %param_hash ) ; push @sections, $config{ +COMMON } if exists $config{ +COMMON } ; while ( my ( $section, $hashref ) = each %config ) { push @sections, $hashref if $section =~ m!\Q$prog\E!io ; } # get a list of all parameters from the source code text my @param_list ; open SELF, "< $0" or die "$prog: cannot read '$0': $!\n" ; while () { last if m!! ; # stop scanning $0 at _this_ very line next unless m!!i ... m!!i ; push @param_list, m![\$\@\%\*\&]\w+!g unless m!^\s*[;\#]! ; } close SELF or die ; # read and assign my parameters my $parameter_count = 0 ; foreach (@param_list) { my ( $type, $name ) = m!^(.)(\w+)! ; next unless $type =~ m![\$\@]! ; # only scalars and arrays possible $param_hash{$name} = undef ; # remember parameter names for code below foreach my $section (@sections) { if ( exists $section->{$name} ) { if ( $type eq "\$" ) { # assign scalar eval qq! \$$name = \$section->{'$name'} ! ; } elsif ( $type eq '@' ) { # split and assign to array eval qq! \@$name = \@{\$section->{'$name'}} ! ; } die "$prog: cannot assign to $type$name: $@\n" if $@ ; $parameter_count++ ; eval qq! \$\"=", \\n\\t"; print STDERR "\\$type$name = '$type$name'\\n" ! if $debug >= 5 ; } } } print STDERR "$prog: WARNING: no parameters read from '$configfile'\n" unless $parameter_count ; # warn about unrecognized parameters (uses %param_hash built above) my @unrecognized ; foreach my $section (@sections) { foreach my $parameter ( keys %{$section} ) { push @unrecognized, $parameter unless exists $param_hash{$parameter} ; } } if (@unrecognized) { @unrecognized = sort @unrecognized ; print STDERR "$prog: WARNING: ", "the following parameters are not recognized:\n", "@unrecognized\n" ; } # untie untie %config or die "untie failed" ; } else { print STDERR "no config file found\n" if $debug >= 2 ; } # 2nd step: process command line options if ( !GetOptions( \%optctl, "cvsweb|url=s", "branch|tag=s", "version!", "urlsuffix|suffix=s", "outfile|output=s", "committer|com=s", "showallbranches!", "debuglevel=i", "d+", "queryprcgi=s", "logmessage|logstring|logpattern|logmsg=s@", "link-committers!", "and!", "number-commits!", "insensitive!", "body-attributes=s", "headfoot!", "from|after|since=s", "to|upto|before=s", "verbose!", "hr!", "maximum|atmost=i", "prefix=s", "stdin-with-prefix!", "fileheaders!", "sequential!", "sort-arguments!", "help!", "filename|file=s", "commitlogdir=s", ) ) { usage() ; } help() if $optctl{help} ; if ( $optctl{version} ) { print "$version\n" ; exit 0 ; } $verbose = 1 if $debug ; if ($hr) { $hr = "
" ; } else { $hr = "" ; } $unknown_prefix = "-|$unknown_prefix" unless $stdin_with_pfx ; # convert the "from ... to" time interval into seconds since the epoch my $date_wanted = 0 ; if ( $start_date || $end_date ) { $date_wanted = 1 ; eval "require Time::ParseDate" ; if ($@) { die "$prog: required module Time::ParseDate not available\n" ; } else { import Time::ParseDate qw(parsedate) ; } # parse the date(s), check start <= end my $err ; my %opts = ( WHOLE => 1, PREFER_PAST => 1, VALIDATE => 1 ) ; if ($start_date) { ( $start_date, $err ) = parsedate( $start_date, %opts ) ; die "$prog: bad date: $err\n" unless $start_date ; } else { $start_date = 0 ; } if ($end_date) { ( $end_date, $err ) = parsedate( $end_date, %opts ) ; die "$prog: bad date: $err\n" unless $end_date ; } else { $end_date = time() + 86400 ; } if ( $start_date > $end_date ) { die( "$prog: negative time interval:\n\t'", scalar( localtime($start_date) ), "' ==> '", scalar( localtime($end_date) ), "'\n" ) ; } print STDERR ( "$prog: '", scalar( localtime($start_date) ), "' ==> '", scalar( localtime($end_date) ), "'\n" ) if $verbose ; } # pre compile search patterns to speed up log message grepping my @compiled_logmsg_patterns = map { $case_insensitive ? qr/$_/ims : qr/$_/ms } @logmsg_patterns ; if ( $outfile ne "-" ) { close(STDOUT) or die ; $outfile = $1 if $outfile =~ /^(.*)$/ ; # untaint open( STDOUT, "> $outfile" ) or die "$prog: redirect output to '$outfile': $!.\nStopped" ; } # VARIABLES my ( @f, $fname, $uri, $htm, $dir, $found_dir, $logmessage, $headline ) ; $cvsweb =~ s!/+$!! ; # remove trailing slashes my $cvsweb_suffix = "" ; $cvsweb_suffix = "?only_with_tag=" . $branch_wanted if $branch_wanted && !$show_all_branches ; $cvsweb_suffix .= ( $cvsweb_suffix ? "&" : "?" ) . $urlsuffix if $urlsuffix ; my $copy = 0 ; # copy from input flag: 0..suppress, 1..buffer, 2..copy my $buf = "" ; # output buffer (until copy/suppress decision is done) my $files = 0 ; # 1 if inside added/modified/removed files section my $RCP = 0 ; # 1 if inside "Revision Changes Path" section my $log = 0 ; # 1 if inside "Log:" section my $filenm_found = 0 ; # found --filename pattern in actual commit message my ( $commit_count_in, $commit_count_out ) = ( 0, 0 ) ; my ( $committer, $date, $time, $tz, %by_committer, $branchtag ) ; # (do not confuse the parsed $branchtag with $branch_wanted from commandline) # parse the line in $_ sub parse_line() { { print STDERR if $debug >= 5 ; if (/^\s*$/) { $files = $RCP = 0 ; # empty lines close "files" or "RCP" sections next ; } @f = split ; # check for begin of new commit message: # look for a "committer" "date" "time" "tz" line if ( ( $#f == 2 || $#f == 3 ) && $f[0] =~ /^\S+$/ && $f[1] =~ m!^(?:\d\d)?\d\d([-/])\d\d\1\d\d$! && $f[2] =~ /^\d\d:\d\d:\d\d$/ # && ($#f == 2 || $f[3] =~ /^[A-Z]{3,4}$/ ) && /^\S+/ # committer name must start in first column ) { # finish last commit? if ( $copy == 1 ) { my $rejected = "" ; # is the branch correct? if ( $branch_wanted && $branch_wanted ne $branchtag ) { # we take this code path only for "ancient" imports... $rejected = "not branch '$branch_wanted' (???)" ; } # did we find a --filename file? elsif ( $filenm_pattern && !$filenm_found ) { $rejected = "not file '$filenm_pattern'" ; } # grep the log message? elsif (@logmsg_patterns) { my $accepted = 0 ; goto no_logmessage if !$logmessage ; if ($and_patterns) { # all patterns ANDed $accepted = 1 ; foreach my $pat (@compiled_logmsg_patterns) { if ( $logmessage !~ /$pat/ ) { $accepted = 0 ; last ; } } } else { # all patterns ORed foreach my $pat (@compiled_logmsg_patterns) { if ( $logmessage =~ /$pat/ ) { $accepted = 1 ; last ; } } } no_logmessage: if ( !$accepted ) { $rejected = $logmessage ? "not" : "missing" ; $rejected .= " logmessage" ; } } if ($rejected) { --$by_committer{$committer} ; # adjust per committer count print STDERR "$rejected\n" if $debug >= 3 ; } else { #accepted $commit_count_out++ ; print $headline, $buf ; $headline = $buf = "" ; print STDERR "ACCEPTED $commit_count_out\n" if $debug >= 3 ; } } # finished last commit # initialize all those state variables :( $copy = $files = $log = $RCP = 0 ; # not inside any section yet $branchtag = $buf = $logmessage = "" ; # not known yet $filenm_found = 0 ; $dir = $found_dir = undef ; # output limit reached? if ( $maximum_output && $commit_count_out >= $maximum_output ) { print STDERR "LIMIT $commit_count_out reached\n" if $debug >= 3 ; print "

limit of $commit_count_out commits reached

\n" ; local $^W = 0 ; last LINE; } # PROCESS THE COMMIT MESSAGE $commit_count_in++ ; print STDERR "found commit $commit_count_in: @f\n" if $debug >= 2 ; ( $committer, $date, $time, $tz ) = @f ; # $tz may be undefined or junk if we have a "historic" commit message $tz = "" if !defined($tz) || $tz !~ /[A-Z]{3,4}/ ; # filter by committer if ( $committer_wanted && $committer !~ m!^(?i:$committer_wanted)$! ) { # we are not for this committer print STDERR "not committers '$committer_wanted'\n" if $debug >= 3 ; next ; } # filter by date (inefficient algorithm) if ($date_wanted) { my $datetime = "$date $time $tz" ; my $commit_date = parsedate($datetime) || die ; if ( $commit_date < $start_date ) { print STDERR "too early '$datetime'\n" if $debug >= 3 ; next ; } if ( $commit_date > $end_date ) { local $^W = 0 ; print STDERR "too late '$datetime'\n" if $debug >= 3 ; last LINE if $commits_are_time_sequential ; next ; } } if ( $branch_wanted || @logmsg_patterns || $filenm_pattern ) { $copy = 1 ; # buffer output until we have seen the branch / log / f print STDERR "want to see b/l/f: ", "$branch_wanted / @logmsg_patterns / $filenm_pattern\n" if $debug >= 6 ; } else { $copy = 2 ; ++$commit_count_out ; print STDERR "ACCEPTED (head) $commit_count_out\n" if $debug >= 3 ; } my $count = ++$by_committer{$committer} ; # num. of commits by committer # (The above count will have to be decremented if we decide to filter # this commit message later.) my ( $next, $prev ) = ( $count + 1, ( $count > 1 ? $count - 1 : undef ) ) ; if ($hyperlink_committers) { $uri = uri_escape($committer) ; $htm = html_escape($committer) ; # insert a link to this committer's next commit s!^\Q$committer\E!$htm$extra_font1 ($count)$extra_font2! ; if ($prev) { # insert a link to this committer's previous commit s!$! $extra_font1(previous)$extra_font2! ; } } # make an anchor for this commit message if ($number_commits) { my ($anchor) ; if ( $ARGV =~ /^(\w+(\.\d+)?)/ ) { $anchor = $1 ; $anchor =~ tr /.//d ; } else { $anchor = "commit" ; } $anchor .= "/$commit_count_in" ; s!^!$extra_font1($anchor) $extra_font2$hr! ; } elsif ($hr) { s/^/$hr/ ; } next ; } # end of headline processing die "Bad input format: $_\nStopped" if /^\S/ # only commit headlines (see above) start in column 1 && !/^(\w+\s)+repository/i ; # except on the DragonFly repository # No need to parse further? next unless $copy ; # check for a new repository directory or imported sources if ( /^ (\S+) - New directory$/ || /^ (\S+) - Imported sources$/ ) { $found_dir = $1 ; $branchtag = "MAIN" ; # Let's assume imports go to MAIN if ( $copy == 1 ) { if ( $branch_wanted && $branch_wanted ne "MAIN" ) { --$by_committer{$committer} ; # adjust per committer count $copy = 0 ; # suppress this commit print STDERR "not branch '$branch_wanted' (new dir/import)\n" if $debug >= 3 ; next ; } else { print STDERR ( /New/ ? "NEW DIR" : "IMPORTED", " $commit_count_out\n" ) if $debug >= 3 ; } } # get the real directory name # hyperlink the directory if ( $ARGV !~ /^($unknown_prefix)/o ) { $htm = html_escape($found_dir) ; $uri = url_escape("$cvsweb/$found_dir$cvsweb_suffix") ; s!\Q$found_dir\E!$htm! ; } next ; } # check for (another) begin of "files" section # (there can be 1..3 per commit) if ( $#f >= 1 && $f[0] =~ /^(Added|Branch:|Modified|Removed):?$/ && ( $f[0] =~ /:$/ || $f[1] eq 'files:' ) ) { $files = 1 ; $log = $RCP = 0 ; $found_dir = undef ; if ( $#f == 3 && $f[2] eq "(Branch:" ) { $branchtag = $f[3] ; $branchtag =~ s/\)$// ; # remove ')' } elsif ( $f[0] eq "Branch:" ) { # old style commit message format $branchtag = $f[-1] ; } else { $branchtag = "MAIN" ; } $htm = html_escape($branchtag) ; s!\b(\Q$branchtag\E)\b!$branch_font1$htm$branch_font2! ; # filter on branch if ( $copy == 1 ) { if ( $branch_wanted && $branch_wanted ne $branchtag ) { --$by_committer{$committer} if $copy ; # adjust committer count $copy = 0 ; print STDERR "not branch '$branch_wanted'\n" if $debug >= 3 ; } elsif ( !@logmsg_patterns && !$filenm_pattern ) { $copy = 2 ; ++$commit_count_out ; print STDERR "ACCEPTED (branch) $commit_count_out\n" if $debug >= 3 ; } } if ( $f[0] =~ /:$/ ) { # old style commit message if ( $ARGV =~ /^($unknown_prefix)/o ) { if ( $f[1] =~ m!/ncvs/(\S+)$! ) { $found_dir = $f[1] ; $dir = $1 ; } else { $dir = $found_dir = "" ; # we do not know the prefix } } else { $found_dir = $f[1] ; $dir = "${pfx}$found_dir" ; } } else { $dir = $found_dir = undef ; next ; } } # check for begin of "Log:" section if (/^ Log(?: Message)?:$/) { start_of_log: $files = $RCP = 0 ; # begin of "log" section closes "files" section $log = 1 ; print STDERR "found Log: >>$_" if $debug >= 4 ; next ; } # check for end of "Log Message" in "Imported sources" commits if ( $found_dir && /^ (Status|Vendor Tag|Release Tags):/ ) { $log = 0 ; } # check for begin of "RCP" section if ( $#f == 2 && $f[0] eq "Revision" && $f[1] eq "Changes" && $f[2] eq "Path" && /^ Revision\b/ ) { $RCP = 1 ; $log = $files = 0 ; # begin of "RCP" section closes "Log:" section warn "Zero logmessage: $committer $date $time\n" if !$logmessage && $copy == 1 && $debug >= 4 ; print STDERR "found RCP\n" if $debug >= 4 ; next ; } # handle data inside "files" section if ($files) { # we are inside the "files" section # if a string contains a slash (/) it is the directory name if (m!^(?: {4}| {15})(\S+)\s!) { # find directory depending on leading whitespace: # NEW: /^ {4}(\S+)/ OR OLD: /^ {15}(\S+)/ $dir = $found_dir = $1 ; if ( $ARGV =~ /^($self_prefix)/o ) { $dir = "$1/$dir" ; } elsif ( $ARGV =~ /^($unknown_prefix)/o ) { if ( $found_dir =~ m!/ncvs/(\S+)$! ) { $dir = $1 ; } else { $dir = "" ; # we do not know the prefix } } else { $dir = "${pfx}$dir" ; } print STDERR "found directory: '$dir'\n" if $debug >= 4 ; } if ( !defined($dir) || !defined($found_dir) ) { # Format error (maybe a commit message pasted into the log # of another one...) goto start_of_log; } next unless $dir ; foreach my $f (@f) { if ( $f eq $branchtag || $f =~ /^(Added|Branch|Modified|Removed):$/ # old style || $f =~ /\.$/ ) { next ; } $filenm_found ||= $f =~ /$filenm_pattern/o if $filenm_pattern ; $htm = html_escape("$f") ; if ( $f eq $found_dir ) { $uri = url_escape("$cvsweb/$dir$cvsweb_suffix") ; } else { $uri = url_escape("$cvsweb/$dir/$f$cvsweb_suffix") ; } s!\b\Q$f\E(?=\s)!$htm! ; } } # end of code for files section # handle data inside "Log:" section if ($log) { # handle old style import lines if (m!^ [A-Z] (\S+/\S+)$!) { if ($dir) { $htm = html_escape($1) ; $uri = url_escape("$cvsweb/$1$cvsweb_suffix") ; s!\b\Q$1\E\b!$htm! ; } next ; # this is not part of the log message } $logmessage .= $_ if $copy == 1 ; # accumulate logmessage $_ = html_escape($_) ; # html_escape logmessage text # hyperlink http:// and ftp:// URLs s!\b((?i:https?|ftp)://\S+)! ''.html_escape($1).""!eg ; # hyperlink PR number (only one) if (m!\bPR\s*[:\#]\s*((?:\w+/)?\d+)\b!) { $uri = url_escape("$query_pr_cgi?pr=$1") ; $htm = html_escape("$1") ; s!\b$1\b!$htm! ; } # hyperlink submitter email address(es) if (/^\s+[A-Z][a-z]+.*(?i:by|from):/g) { s%\b([\w+=\-.!]+@[\w\-]+(\.[\w\-]+)+)\b% '' . html_escape($1) . ""%eg ; } else { # only within '<' '>' s%(?:<|<)([\w+=\-.!]+@[\w\-]+(\.[\w\-]+)+)(?:>|>)% '<' . html_escape($1) . '>'%eg ; } # emphasize searched text (you'd rather say '.+' than '.' for testing) for my $pat (@compiled_logmsg_patterns) { s!($pat)!$log_font1$1$log_font2!g ; } } # end of "Log:" section handling # handle Import message pathnames elsif ( $found_dir && /^ [A-Z] (\S+)$/ ) { my $f = $1 ; next unless $f =~ /\// ; # must contain slash $filenm_found ||= $f =~ /$filenm_pattern/o if $filenm_pattern ; $htm = html_escape("$f") ; $uri = url_escape("$cvsweb/$f$cvsweb_suffix") ; s!\b\Q$f\E(?=\s)!$htm! ; } # handle data inside "RCP" section # lines have this format: "rev" "+x" "-y" "path" ["(new)"] if ($RCP) { if ( !m%^\s+ (\d+(?:\.\d+)+) # $1: revision \s+ ([+]\d+\s[-]\d+) # $2: changes +lines -lines \s* (\S+) # $3: pathname (?:\s+ \((new|dead)\))? # $4: optional "new" or "dead" \s* $ %x && !m%^\s+ (1.1) # $1: revision 1.1 \s+ () # $2: empty (no changes) \s* (\S+) # $3: pathname (?:\s+ \((new|dead)\))? # $4: optional "new" or "dead" \s* $ %x ) { # Format error (maybe a commit message pasted into the log # of another one...) goto start_of_log; } my ( $rev, $plusminus, $path ) = ( $1, $2, $3 ) ; $path =~ s%^/.*?cvs/%%i ; # DragonFly fix for 2003/08/07 14:17 my $htm = html_escape($path) ; $filenm_found ||= $path =~ /$filenm_pattern/o if $filenm_pattern ; my $prev = $rev ; # previous revision if ( $prev =~ /\.(\d+)$/ ) { my $lsn = $1 ; # least significant number in revision $lsn -= 1 ; if ($lsn) { # delta x.y => x.(y+1) $prev =~ s/$1$/$lsn/ ; } else { # delta x.y => x.y.z.1 $prev =~ s/\.\d+\.\d+$// ; } my $suffix = ( $cvsweb_suffix ? "$cvsweb_suffix&" : "?" ) ; # order of substitutions (line end to begin) is important here! # hyperlink the source file $uri = url_escape("$cvsweb/$path$cvsweb_suffix") ; s!(?<=\d|\s)\Q$path\E\b!$htm! ; # hyperlink the delta $uri = url_escape("$cvsweb/$path.diff${suffix}r1=$prev&r2=$rev") ; s!\Q$plusminus\E!$plusminus! if $plusminus ; # hyperlink the new revision $uri = url_escape("$cvsweb/$path") . "#" . url_escape("rev$rev$cvsweb_suffix") ; s!\b\Q$rev\E\b!$rev! ; } } } continue { if ( $copy == 2 ) { # copy out if ($headline) { print $headline; $headline = "" ; } if ($buf) { print $buf; $buf = "" ; } print ; } elsif ( $copy == 1 ) { # accumulate into $buf $buf .= $_ ; } else { # suppress output $buf = "" ; } } } # end of parse_line() ######### MAIN PROGRAM ######### if ( !@ARGV ) { usage() if -t ; # only tty input $file_headers = 0 ; unshift( @ARGV, '-' ) ; } else { my ( $argv, @new_argv, @glob ) ; foreach $argv (@ARGV) { if ($commitlogdir) { if ( -f "$commitlogdir/$argv" ) { push @new_argv, "$commitlogdir/$argv" ; next ; } @glob = grep { -f $_ } glob "$commitlogdir/$argv" ; if (@glob) { push @new_argv, @glob ; next ; } } if ( -f $argv ) { push @new_argv, $argv ; next ; } @glob = grep { -f $_ } glob $argv ; if (@glob) { push @new_argv, @glob ; next ; } print STDERR "$prog: no commitlog-file '$argv' found\n" ; } @ARGV = @new_argv ; if ($sort_arguments) { local $^W = 0 ; @ARGV = sort { -M $b <=> -M $a } @ARGV ; } } usage() unless @ARGV ; my $title = "$prog" ; foreach my $arg (@ARGV) { $title .= " " . basename($arg) ; } html_escape($title) ; print < $title
EndOfHeader

FILE:
while ( $ARGV = shift ) {
    if ( $ARGV =~ /^(\+?>|\|)/ || -d $ARGV ) {
	print STDERR "$prog: ignoring '$ARGV'\n" ;
	next ;
    }

    # See if we can skip a file because of it's age
    if ($date_wanted) {
	my $fileage = -M $ARGV ;
	if (   $fileage
	    && $start_date
	    && $^T - $fileage * 86400 < $start_date )
	{

	    # this input file is too old, we can skip it
	    print STDERR "$prog: file '$ARGV' is too old, skipping it\n"
	      if $debug >= 2 ;
	    next FILE;
	}
    }
    if ( open( ARGV, "$ARGV" ) ) {
	$. = $commit_count_in = $commit_count_out = 0 ;
	my $gz = gzopen( \*ARGV, "rb" ) or die $ARGV ;
	print STDERR "$prog: parsing '$ARGV'\n" if $debug >= 1 ;
	$headline = '' ;
	$headline .= "

" . html_escape($ARGV) . "

\n" if $file_headers ; $ARGV = basename($ARGV) ; LINE: while ( $gz->gzreadline($_) > 0 ) { $.++ ; &parse_line() ; } # There are errors returned here when closing. Since we are only # reading we don't care. There could be a memory leak in Perl or # Compress::Zlib (I didn't check) however there is no descriptor # leak (verified with lsof). $gz->gzclose() ; close(ARGV) ; if ( $commit_count_in == 0 ) { warn "$prog: no commit messages found in $ARGV\n" if $debug >= 2 ; } elsif ( $commit_count_out == 0 ) { warn "$prog: no commit messages converted from $ARGV\n" if $commit_count_in > 1 && $verbose || $debug >= 2 ; } elsif ( $commit_count_in != $commit_count_out ) { warn "$prog: $ARGV: $commit_count_out of $commit_count_in commit messages converted\n" if $verbose ; print "$extra_font1$commit_count_out of $commit_count_in commit messages displayed$extra_font2

\n" if $file_headers ; } } else { print STDERR "$prog: $ARGV: $!\n" ; } } print <<'EndOfFooter' if $html_header_and_footer ;

EndOfFooter if ( $outfile ne "-" ) { close(STDOUT) or die ; } __END__; =head1 NAME commitlog2cvsweb - convert FreeBSD commitlog files to HTML =head1 SYNOPSIS =over 4 =item . commitlog2cvsweb [--cvsweb=I] [--urlsuffix=I] [--branch=I] [--showallbranches] [--outfile=I] [--committer=I] [--and] [--insensitive] [--logmessage=I]... [--link-committers] [--queryprcgi=I] [--number-commits] [--maximum=I] [--sort-arguments] I... =item . commitlog2cvsweb --version =back =head1 DESCRIPTION The commitlog2cvsweb program takes one ore more commitlog files and converts them into HTML. The commitlog files are usually stored under F and can be retrieved by cvsup-ing the cvsroot-common and cvsroot-src collections in CVS mode. (See L, http://www.FreeBSD.org/doc/en/books/handbook/cvsup.html and L.) The generated HTML has hyperlinks to the C script. This means you can click on any of the changed files and see the CVS log (change history) and have access to all the revisions and deltas. (The CGI script C was originally written by Bill Fenner for the FreeBSD project. It allows browsing of CVS-repositories with a HTML-browser. CVS is a popular version control system. C is written by John Polstra . It is a network distribution package for CVS repositories.) Options may be abbreviated to a unique prefix. The options are as follows: =over 4 =item --cvsweb=I Specify URL of cvsweb.cgi script. Default: http://cvsweb.FreeBSD.org/ =item --urlsuffix=I Specify some extra information for appending to generated URLs. (You should not type a leading C or C<&> character because it will be added automatically.) =item --branch=I Output only commit messages from branch I. =item --showallbranches Tell C that you want to see file revisions on all branches. The default behaviour is to show only revisions on the branch selected with C<--branch>. =item --outfile=I Specify the output file. If no output file is specified standard output is used. =item --committer=I Output only commit messages from committer I. More than one committer is requested by separating the committer names with a bar. Committer names in I are case insensitive and must match the full committer name. =item --filename=I Output only commits affecting file I. (Note that the pattern is in Perl syntax and not in Shell syntax!) =item --logmessage=I Output only commits with matching commit log messages. This option may be be specified multiple times. =item --and Output only commits matching all logmessage patterns (AND). The default is to output commits matching any pattern (OR). =item --insensitive Ignore case when matching logmessage patterns. =item --link-committers Add hyperlinks to committers previous/next commit. =item --number-commits Sequentially number all commits. (Numbering is done on a per file basis.) =item --queryprcgi=I Use I to query problem reports. Default: http://www.FreeBSD.org/cgi/query-pr.cgi =item --maximum=I Output at most I commits per input file. =item --body-attributes=I Specify attributes for the html BODY tag. =item --noheadfoot Suppress output of HTML, HEAD and BODY tags. This is for including the output into another HTML file. =item --from=I Output only commit messages made after I. Options C<--after> and C<--since> are aliases for C<--from>. =item --to=I Output only commit messages made before I. Options C<--before> and C<--upto> are aliases for C<--to>. If I does not contain a timezone then the date is interpreted according to the local timezone. Check L for supported date/time formats. =item --nofileheaders Do not emit H1 tags for each input file. =item --commitlogdir=I Look for commitlogfiles in I. (May contain shell wildcard characters.) =item --sort-arguments Arguments (input files) are processed in order of their modification times. This is useful to process several commitlog files in chronological order. =item --version Print version information and exit. =back Input file names may contain shell wildcard characters ('C<*>', 'C'). The -C<--commitlogdir> is prepended if given. Input files may have been compressed with C. =head1 FILES C looks for a configuration file in three places. =over 4 =item * If the variable C is set in the environment its content is interpreted as the name of the configuration file, otherwise =item * the file F<~/.cvsweb-converters.conf> is examined, and finally =item * F<@PREFIX@/etc/cvsweb-converters.conf> is tried. =back Only the first existing file is used. =head1 EXAMPLES =head2 Step1: Retrieving the commitlog files As a first step you need to get commitlog files. Retrieve them with cvsup(1). An example supfile is: # Change the next line to use your nearest CVSup mirror site, check # out http://www.FreeBSD.org/doc/en/books/handbook/cvsup.html *default host=cvsup2.FreeBSD.org base=/usr prefix=/home/ncvs *default release=cvs delete use-rel-suffix # If you have a slow network link, uncomment the following line! #*default compress # Next two lines will fetch/sync the commitlogs of /usr/src only. # If you are interested in doc/ports/projects too then you will # need "cvsroot-all" instead of only -common and -src. #cvsroot-all cvsroot-common cvsroot-src Store this as file F and run cvsup: C This will retrieve all commitlog files. That is some 32 MiB as of June 2006. If you are not interested in old logs use a refuse file as described in L or a more selective C<-i> option in the above command line example. =head2 Step2: HTMLizing with commitlog2cvsweb To check what was going on in the area of FreeBSD kernel development since last monday run this command: C and open F. To see all kernel commits mentioning C or C dating from the year 2006 run: C Note how C<\b> (word boundary) was used to avoid matching C! =head1 BUGS This implementation is too slow. Commit messages pasted into logs of other commits may confuse the parser. Timestamps in commitlog files without a timezone are interpreted according to the local timezone. (These timestamps occur only in very old FreeBSD commit logs.) Option C<--link-commiters> always adds a link to the next commit made by the same committer. This is wrong for the last one. (This bug is a consequence of single pass processing.) =head1 AUTHOR Martin Kammerhofer =cut # Local Variables: # mode: perl # End: #EOF#