#!/usr/bin/perl -w
# Copyright (c) 2004-2007 Matthew Seaman. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# @(#) $Id: cache-update,v 1.38 2007/08/05 16:35:55 matthew Exp $
#
# Generate an incremental update to the cached 'make describe' output,
# for the listed port origins. Requires (a) previously built cache of
# the whole ports tree, including a table showing which ports are
# slave ports and which are masters and (b) a method for automatically
# determining which ports were affected since the last cache update
#
# Running the make_describe method on a candidate port will chdir to
# that path and run 'make -V ...' in the given directory, from which
# is extracts the values of a number of make variables. Internal
# processing of that output generates the equivalent of running 'make
# describe' (but we have saved executing a whole perl process for each
# port) and that is converted into a FreeBSD::Portindex::Port or a
# FreeBSD::Portindex::Category object, which is frozen (serialized)
# and saved in the ports cache. It extracts the values of a couple of
# other make variables: MASTER_PORT, .MAKEFILE_LIST and SUBDIRS. That
# output is recorded in the cached object as well (MASTER_PORT only if
# it set, and not the same as CWD, and only MAKEFILE_LIST items within
# the ports tree, SUBDIRS only for Category objects). Failing to run
# make successfully results in that port data being removed from the
# cache -- which is how deletions and port moves are handled.
#
# The Master/Slave port relationships recorded from a previous run are
# applied to force an update for all slave ports whenever the master
# port is updated. Ditto if an included Makefile is updated -- all
# ports including it are added to the list of update candidates. (If
# it happens that /usr/ports/Mk/bsd.port.mk is updated, that means do
# every port)
use strict;
use warnings;
use FreeBSD::Portindex::Config qw(read_config update_timestamp get_timestamp
compare_timestamps scrub_environment);
use FreeBSD::Portindex::Port;
use FreeBSD::Portindex::Tree;
our %Config;
our $pkgname = 'portindex';
$0 =~ s@.*/@@; # Script name for error messages
# Just read a list of port origins, one per line, from the given file
# handle (default STDIN).
sub read_plain (*$)
{
my $FH = shift;
my $updates = shift;
while (<$FH>) {
chomp;
$updates->{$_}++;
}
return $updates;
}
# Given a fully qualified path name that corresponds to one of the
# file names that would be found in a port or category directory,
# return the path of the enclosing port or category that can be passed
# to 'make_describe to be checked out. Otherwise, return undef
sub port_directory_part ($$)
{
my $portsdir = shift;
my $name = shift;
if ( $name =~ m@^($portsdir\S*?)/(?:Makefile|distinfo|pkg-|files)@o ) {
return $1
unless $name =~
m@^$portsdir/(?:Mk|Templates|Tools|distfiles|packages)@o;
}
return undef;
}
# Process the data read from the given filehandle assuming it to be
# the output of cvsup(1) [ need atleast -L 1 verbosity ]. Pick out
# the names of updated files, then convert that into a list of port
# origins to process for updates.
sub read_cvsup_output (*$$$)
{
my $FH = shift;
my $updates = shift;
my $tree = shift;
my $portsdir = shift;
my $ports_location;
my $name;
my $pdpart;
# $portsdir is usually /usr/ports, but can be any path ending in
# .../ports (cvsup(1) requires the top-level directory to be named
# 'ports'). Account for this
( $ports_location = $portsdir ) =~ s@/ports\Z@@;
while (<$FH>) {
chomp;
next
unless m@\s(ports/\S+)@;
$name = "$ports_location/$1";
$pdpart = port_directory_part( $portsdir, $name );
$updates->{$pdpart}++
if ( defined $pdpart );
map { $updates->{$_}++ } @{ $tree->makefile_list($name) };
}
return $updates;
}
# Scan through the cvsup checkouts file -- usually
# /var/db/sup/ports-all/checkouts.cvs:. -- and find all of the file
# entries marked with an mtime later than some cutoff point. The
# cutoff point should be about 1 hour earlier than the last time
# cache-update or cache-init was run, as determined by the mtime of
# the DB timestamp file.
sub read_cvsup_checkouts (*$$$$)
{
my $FH = shift;
my $updates = shift;
my $tree = shift;
my $cutofftime = shift;
my $portsdir = shift;
my $ports_location;
# As for read_cvsup_output, cope with a non-standard $PORTSDIR
( $ports_location = $portsdir ) =~ s@/ports\Z@@;
while (<$FH>) {
my $name;
my $rcs_attr;
my @rcs_attrs;
chomp;
# Either "checkout" mode: (C = currently existing, c = used to
# be present, but now deleted), or "cvs" mode (V = existing, v
# = now gone
next unless m/^[cCvV]/;
# Extract the important data from either style of the
# checkouts line
if (m/^[cC]/) {
( undef, $name, undef, undef, $rcs_attr, undef ) = split ' ', $_, 6;
} else {
( undef, $name, $rcs_attr ) = split ' ', $_, 3;
}
# Can't short-circuit this checking -- any file could be a
# Makefile from the .MAKEFILE_LIST output. However, limit the
# checks to just files (strip the ',v' suffix due to CVS
next
unless ( $name =~ m@(ports/\S+),v@g );
$name = "$ports_location/$1";
# Unpick the $rcs_attr record.
@rcs_attrs = decode_attrs($rcs_attr);
if ( $rcs_attrs[2] > $cutofftime ) {
my $pdpart;
$pdpart = port_directory_part( $portsdir, $name );
$updates->{$pdpart}++
if ( defined $pdpart );
map { $updates->{$_}++ } @{ $tree->makefile_list($name) };
}
}
return $updates;
}
sub decode_attrs ($)
{
my $rcs_attr = shift;
my @attrs;
my @rcs_attrs;
# Unpick the $rcs_attr record.
@attrs = split '#', $rcs_attr;
while ( $attrs[0] ) {
my $n = shift @attrs;
push @rcs_attrs, substr $attrs[0], 0, $n, '';
}
return @rcs_attrs;
}
# Scan through the PORT_DBDIR looking for 'options' files. Compare
# the mtime of the file with the last update timestamp from the cache
# -- add the port to the list to be checked if the options have been
# modified more recently.
sub check_port_options ($$$)
{
my $updates = shift;
my $tree = shift;
my $port_dbdir = shift;
my $options;
my $mtime;
my $port;
opendir PORT_DBDIR, $port_dbdir
or do {
warn "$0: Error. Cannot read directory \'$port_dbdir\' -- $!\n";
return $updates;
};
while ( my $dir = readdir PORT_DBDIR ) {
next
unless $dir =~ m/[\w-]+/; # Skip things with dots in the name
# The Makefile generated by and included due to OPTIONS
# processing
$options = "$port_dbdir/$dir/options";
if ( -r $options ) {
$mtime = ( stat(_) )[9];
foreach my $origin ( @{ $tree->makefile_list($options) } ) {
$port = $tree->get($origin);
if ( $port->can("MTIME") && $port->MTIME() < $mtime ) {
$updates->{$origin}++;
}
}
}
}
closedir PORT_DBDIR
or warn "$0: Error. Closing directory \'$port_dbdir\' -- $!\n";
return $updates;
}
MAIN:
{
my $tree;
my $allports;
my %updates;
my $counter;
my $startcounter;
read_config( \%Config );
# Some Makefiles affect the compilation of all ports. When those
# are changed, then it is a good idea to re-initialise the cache
# from scratch. Note: the list of Makefiles included on every
# invocation of make(1) is longer than this, but /usr/sys/Mk/* and
# other stuff doesn't generally affect the contents of the
# resulting INDEX. Run 'make -V .MAKEFILE_LIST | tr ' ' \\n' in
# some port directories to see the full story.
compare_timestamps( \%Config );
scrub_environment( \%Config )
if $Config{ScrubEnvironment};
# Reopen STDIN if required
if ( $Config{Input} ne '-' ) {
open STDIN, '<', $Config{Input}
or die "$0: Can't open input $Config{Input} -- $!\n";
}
# tie to the stored description, etc. data
$tree = FreeBSD::Portindex::Tree->new(
-Env => { -Home => $Config{CacheDir}, },
-CacheFilename => $Config{CacheFilename},
);
# Read the list of ports to re-check and update for the
# appropriate input format. GetOptions() will enforce correct
# $Config{Format} syntax.
if ( $Config{Format} =~ m/plain/ ) {
read_plain( *STDIN, \%updates );
} elsif ( $Config{Format} =~ m/cvsup-output/ ) {
$tree->init_makefile_list();
read_cvsup_output( *STDIN, \%updates, $tree, $Config{PortsDir} );
} elsif ( $Config{Format} =~ m/cvsup-checkouts/ ) {
my $cutofftime;
$cutofftime = get_timestamp( \%Config ) - $Config{PropagationDelay};
$tree->init_makefile_list();
read_cvsup_checkouts( *STDIN, \%updates, $tree, $cutofftime,
$Config{PortsDir} );
}
if ( $Config{Format} =~ m/options/ ) {
$tree->init_makefile_list();
check_port_options( \%updates, $tree, $Config{PortDBDir} );
}
# If there's nothing to update, exit immediately.
unless (%updates) {
print STDERR "$0: Nothing to do!\n"
if $Config{Verbose};
exit 0;
}
# If any of the entries to update consists of a category Makefile,
# then it should be replaced by a list of the differences to the
# SUBDIRS from that category since the last update. After this
# process, %updates should contain only ports, without categories.
$counter = 0;
do {
$startcounter = $counter;
for my $path ( keys %updates ) {
if ( $tree->category_match($path) ) {
$counter++;
print STDERR
"$0:$counter: Checking for category changes at $path\n"
if $Config{Verbose};
$tree->category_check( $path, \%updates );
}
}
} while ( $counter > $startcounter );
# If one of the ports given in the @updaters list is the master of
# several slave ports, add all of those slave ports to the
# @updaters list, unconditionally. This only works if the cache
# is largely complete.
$tree->init_masterslave();
# Include in the list of updated ports all of the suitable entries
# from the ports read from STDIN, plus the slaves of any masters
# listed.
map { $updates{$_}++ } map { @{ $tree->masterslave($_) } } keys %updates;
# Mark the time at which the updates start
update_timestamp( \%Config );
# Regenerate the FreeBSD::Port objects for all listed ports
$counter = 0;
for my $path ( sort keys %updates ) {
$counter++;
print STDERR "$0:$counter: Updating cached data for $path\n"
if $Config{Verbose};
$tree->make_describe($path);
}
}
__END__
=head1 NAME
cache-update -- Incrementally update the portindex cache
=head1 SYNOPSIS
B<cache-update> [B<-hvqs>] [B<-c> F<dir>] [B<-C> F<file>] [B<-T> F<file>] [B<-p> F<dir>] [B<-d> F<dir>] [B<-f> I<format>] [B<-P> num] [B<-i> F<file>] [B<-M> F<file>]... [B<-m> F<file>]...
=head1 DESCRIPTION
B<cache-update> processes a list of port origins, regenerating the
index (C<make describe>) and F<Makefile> dependency data (C<make -V
MASTER_PORT -V .MAKEFILE_LIST>) from each of them, and updating the
record of thst data held in the B<portindex> cache. If
B<cache-update> is passed a port origin that no longer exists, it will
delete any corresponding record from the cache. To handle a port that
has been moved, it is necessary to pass B<cache-update> both the old
and the new locations of the port for processing.
The list of ports to be processed by B<cache-update> may be supplied
in four formats:
=over 8
=item B<plain>
A list of port origin directories, one per line. B<find-updated> may
be used to generate a list in this format of all ports containing
files modified after a given date and time, including deleted ports,
slave ports or ports that include a Makefile modified after the given
date and time.
=item B<cvsup-output>
The output from using B<cvsup> to update a checked-out copy of the
ports tree. (Needs B<cvsup> to be run with at least B<-L1> verbosity.)
=item B<cvsup-checkouts>
B<cache-update> can parse the record B<cvsup> keeps of all of the
files known to B<cvs> within the ports tree. By comparing the mtime
file attribute encoded within that file to the mtime of the
F<portindex-timestamp> file in the cache it can determine which files
have been modified since it was last run. When comparing the mtime
values, B<cache-update> has to allow for the time it takes to
propagate updates from the master CVS repository to the world-wide
cvsup servers. This process can occasionally result in missing some
port updates, but they should be handled in a following
B<cache-update>.
=item B<options>
B<cache-update> will search the directory tree where port options
settings are stored. It will compare the timestamps on the F<options>
files it finds with the timestamps recorded in the cache of the last
time the data for that port was updated. If the port options have
been updated more recently than the cache data, the cache entry is
refreshed.
=back
The B<options> format processing may also be combined with any of the
first three formats. The following alternative format settings are
recognised:
=over 8
=item B<plain,options>
=item B<cvsup-output,options>
=item B<cvsup-checkouts,options>
=back
Where one of the ports B<cache-update> has to process is recognised to
be a master port, all known slave ports of that master are
unconditionally added to the list of ports to reprocess. This is
necessary since two of the four input formats pick out the ports
needed to be re-checked by detecting modifications to files within
them. However the C<make describe> output of a slave port may well be
quite different after an update to its master port, even if none of
the files within the slave port have been modified.
Similarly, for input formats other than plain, where an update to a
Makefile is detected, all ports where that Makefile is included will
be added to the list of ports to reprocess. Only Makefiles included
from within the ports tree (ie. under F</usr/ports>, or F<$PORTSDIR>
if it is set in the environment) and if C<OPTIONS> processing is
enabled, from within the ports DB directory (ie under
F</var/db/ports>, or F<$PORT_DBDIR>) are considered. In certain
circumstances it may be more advantageous to run B<cache-init> rather
than B<cache-update>.
=head2 When to run B<cache-init> and when to run B<cache-update>
Over time, successively updating the F<INDEX> file via B<cache-update>
can inevitably produce minor inconsistencies and an F<INDEX> file that
diverges slowly from equivalence to what starting afresh would produce.
For best results it will be necessary to occasionally re-run
B<cache-init> and rebuild the cache from scratch. Certain changes to
your system should act as warnings that this needs to be done.
=over 8
=item *
Modifications to ubiquitously included makefiles such as
F</etc/make.conf> or F</usr/ports/Mk/bsd.port.mk>. B<cache-update>
will compare timestamps on these files with the cache timestamp and
attempt to warn you when a re-initialisation might be a good idea.
The list of ubiquitous makefiles to test in this manner can be set
using the C<--ubiquitous-makefile> command line option: repeat the
option to add more makefiles to the list, or by the
C<UbiquitousMakefiles> configuration file setting. Any makefile name
given as a non-absolute path will be taken as relative to the ports
directory, F</usr/ports> or the value of C<$PORTSDIR> in the
environment, or whatever the C<PortsDir> configuration file setting
contains.
Technically there are a number of other makefiles located outside
F</usr/ports> (C<$PORTSDIR>) which will can be included any time
B<make> is invoked within the ports system, but these are ignored by
B<cache-update> and B<cache-init>. Most of those makefiles rarely
change, and when they do, the changes are unlikely to have any
material effect on the generated ports F<INDEX>.
The C<--endemic-makefile> option or C<EndemicMakefiles> configuration
file setting can be used to tell B<cache-update> of any other
makefiles (ie. from within C<PortsDir>) to be considered to have no
effect on the outcome of generating the F<INDEX> at all. Again,
non-absolute paths are taken to be relative to C<PortsDir>. Repeat
the option to add several entries.
=item *
Modifying the environment between successive runs of B<cache-update>.
B<make> variables can often be set from the environment, although
using F</etc/make.conf> would generally be a better idea. There are
two things that can be done to prevent this causing problems.
Firstly, the configuration file can contain live I<Perl> code: you can
modify the environment of the processes by manipulating the global
C<%ENV> hash from within the configuration file. Secondly both
B<cache-init> and B<cache-update> obey a C<--scrub-environment>
command line flag, and the equivalent C<ScrubEnvironment>
configuration file setting, which deletes everything from the
environment except for certain standard variables. As command line
options generally override configuration files, C<--scrub-environment>
will trump modifying C<%ENV>.
=item *
Installing or updating certain software packages. For instance, the
simple presence of the Gnome libraries on the system will cause many
packages to add a C<-gnome> suffix to their names. The F<editors/vim>
port is a good example of this behaviour. Ports containing Linux
software run under emulation will automatically detect which version
of the F<linux-base> ports you have installed: changing to a different
F<linux-base> port will affect the dependency lists for all Linux
software ports. Unfortunately it is practically impossible to detect
such changes and automatically update affected ports. These are not
the only two examples of such behaviour.
=back
=head2 Configuration Files
B<cache-update> shares configuration files with B<cache-init>,
B<find-updated> and B<portindex>. Any configuration settings are
taken from the following locations, where the later items on this list
override the earlier:
=over 8
=item *
Built-in settings from the B<FreeBSD::Portindex::Config> perl module.
=item *
The system wide configuration file F</usr/local/etc/portindex.cfg>
=item *
The per-user configuration file F<${HOME}/.portindexrc>. This file is
ignored if the process is run as root.
=item *
The local configuration file, found in the current working directory
of the B<cache-init> process F<./.portindexrc>. This file is ignored
if the process is run as root.
=item *
The program command line.
=back
All of the configuration files are optional. A summary of the
resultant configuration options including the effect of any command
line settings is printed as part of the help text when B<cache-init>
is invoked with the C<-h> option.
=head1 OPTIONS
=over 8
=item B<-h>
=item B<--help>
Print a brief usage message and a summary of the configuration
settings after command line processing and then exit.
=item B<-v>
=item B<--verbose>
Turn on verbose output printed to C<STDERR>. This is the default.
=item B<-q>
=item B<--quiet>
=item B<--noverbose>
Turn off verbose output to C<STDERR>. Using both the B<-v> amd B<-q>
options together does not make any sense, but neither does it generate
an error. The last mentioned of the two options will prevail.
=item B<-s>
=item B<--scrub-environment>
Delete all environment variables except for C<$USER>, C<$HOME>,
C<$PATH>, C<$SHELL>, C<$TERM> and C<$TERMCAP>. This provides a
standardized environment for C<make describe> and other sub-processes.
=item B<--noscrub-environment>
Turn off environment scrubbing. All environment variables will be
passed through intact to C<make describe> and other
sub-processes. This is the default.
=item B<-c> F<dir>
=item B<--cache-dir>=F<dir>
The location of the B<portindex> data cache, by default
F</var/db/portindex>.
=item B<-C> F<file>
=item B<--cache-file>=F<file>
Berkeley DB Btree file containing the cached and processed values of a
number of C<make> variables for all of the ports in the tree. This
file name will be relative to the cache directory (B<-c> option above)
unless an absolute path is given. Defaults to F<portindex-cache.db>.
=item B<-T> F<file>
=item B<--timestamp-file>=F<file>
A file within the cache directory whose modification time marks the
last time that data was modified in or added to the cache. Defaults
to F<portsindex-timestamp>
=item B<-p> F<dir>
=item B<--ports-dir>=F<dir>
The location of the ports tree. Almost always defaults to
F</usr/ports> unless C<$PORTSDIR> is set in the environment.
=item B<-d> F<dir>
=item B<--port-dbdir>=F<dir>
Where the C<OPTIONS> settings for ports are stored. Almost always
defaults to F</var/db/ports> unless C<$PORT_DBDIR> is set in the
environment.
=item B<-i> F<file>
=item B<--input>=F<file>
Filename to read in order to generate the list of ports for which the
C<make describe> data needs to be reprocessed. F<-> means read from
STDIN, which is the default. Where the B<-f> format is set to
I<cvsup-checkouts>, the appropriate file to read is usually
F</usr/sup/ports-all/checkouts.cvs:.> or
F</var/db/sup/ports-all/checkouts.cvs:.> depending on the FreeBSD
version.
=item B<-f> { I<plain> | I<cvsup-output> | I<cvsup-checkouts> |
I<options> | I<plain,options> | I<cvsup-output,options> |
I<cvsup-checkouts,options>}
=item B<--format>={ I<plain> | ... }
Which input formats B<cache-update> should parse to determine the list
of ports to reprocess.
=item B<-P> I<num>
=item B<--propagation-delay>=I<num>
When I<cvsup-checkouts> or I<cvsup-checkouts,options> format is in
use, assume that any file with a modification time no more than I<num>
seconds older than the F<portindex-timestamp> cache file, plus all
files younger than the cache, were updated via B<cvsup> since the
previous time B<cache-update> was run. Use those file names as the
basis on which to determine which ports B<cache-update> needs to
reprocess. Note: this does cover files deleted from the ports, as the
checkouts file keeps a record of those.
Empirically a value of 3600 (1 hour) seems to be a reasonable first
choice, but this may need to be adjusted depending on how frequently
your cvsup server updates itself.
=item B<-M> F<file>
=item B<--ubiquitous-makefile> F<file>
Compare the modification timestamp on this file with the last time
that B<cache-update> was run, and indicate that B<cache-init> should
be run instead of B<cache-update>. Also don't trigger any recheck of
a port's description due to updates to this file. Output is
suppressed if running in C<--quiet> mode. This test is meant to pick
out makefiles such as F</usr/ports/Mk/bsd.port.mk>, where any
modification could affect the majority of ports -- in which case
running B<cache-init> instead would be a good idea.
Repeat this option to add more ubiquitous makefiles to the list of
files to have their modification times compared to the last cache
update time. Default: F<Mk/bsd.port.mk>, F</etc/make.conf>.
=item B<-m> F<file>
=item B<--endemic-makefile> F<file>
Ignore this makefile as a trigger for re-checking the C<make describe>
output for any ports. Non-absolute paths will be taken as relative to
C<PortsDir>. Repeat this option to add more makefiles to the
list. Default: F<Mk/bsd.sites.mk>, F<Mk/bsd.commands.mk>,
F<Mk/bsd.destdir.mk>.
=back
=head1 FILES
=over 16
=item F</usr/ports>
The default ports directory.
=item F</var/db/portindex>
The location of the data caches.
=item F<portindex-cache.db>
Btree file containing cached C<make describe> and other output.
=item F<portindex-timestamp>
This file contains the last time and date that the cache was updated
or modified.
=item F<__db.001>, F<__db.002>, F<__db.003>, F<__db.004>
Files used as part of the internal workings of BerkeleyDB, for memory
pool management and DB locking. Will be recreated automatically if
deleted.
=item F</usr/local/etc/portindex.cfg>
System-wide configuration file.
=item F<${HOME}/.portindexrc>
Per-user configuration file
=item F<./.portindexrc>
Local configuration file
=back
=head1 SEE ALSO
L<cache-init(1)>, L<portindex(1)>, L<find-updated(1)>, L<cvsup(1)>,
L<ports(7)>
=head1 BUGS
I<cvsup-checkouts> format mode is not completely accurate. Choosing the
correct propagation delay is a matter of guesswork.
B<cache-update> should optionally parse the contents of
B</usr/local/etc/pkgtools.conf> and apply settings from the
C<MAKE_ENV> array.
Changes to some makefiles outside the ports tree can have significant
effects, which aren't detected. For instance
F</usr/local/etc/php.conf>.
=cut
#
# That's All Folks!
#
syntax highlighted by Code2HTML, v. 0.9.1