#!/usr/bin/perl -w

# Copyright (c) 2004-2007 Matthew Seaman. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#    1.  Redistributions of source code must retain the above
#        copyright notice, this list of conditions and the following
#        disclaimer.
#
#    2.  Redistributions in binary form must reproduce the above
#        copyright notice, this list of conditions and the following
#        disclaimer in the documentation and/or other materials
#        provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

# @(#) $Id: cache-init,v 1.26 2007/08/05 15:00:12 matthew Exp $
#

# Build the portindex cache from scratch by scanning through the whole
# ports tree.  This only needs to be done once (or at infrequent
# intervals), which is a good thing, because this implementation is
# significantly slower than 'make index'.

use strict;
use warnings;
use BerkeleyDB;

use FreeBSD::Portindex::Config qw(read_config update_timestamp
  scrub_environment);
use FreeBSD::Portindex::Port;
use FreeBSD::Portindex::Tree;

our %Config;
our $pkgname = 'portindex';

$0 =~ s@.*/@@;    # Script name for error messages

MAIN:
{
    my $tree;

    read_config( \%Config );

    scrub_environment( \%Config )
      if $Config{ScrubEnvironment};

    # Attempt to create the cache dir if it doesn't exist.  Then
    # populate it.  First, move aside any pre-existing .db files.
    # Delete the old __db.00N files as they only apply to the old
    # version, and will be automatically recreated as necessary.

    mkdir $Config{CacheDir}, 0755
      or die "$0: Can't create $Config{CacheDir} -- $!"
      unless ( -d $Config{CacheDir} );

    chdir $Config{CacheDir}
      or die "$0: Can't change directory to ", $Config{CacheDir}, " -- $!";
    unlink <__db.0??>;
    for my $f (qw( CacheFilename TimestampFilename )) {
        if ( -e $Config{$f} ) {
            rename $Config{$f}, $Config{$f} . ".old"
              or die "$0: Can't make backup of ", $Config{$f}, " -- $!";
        }
    }

    $tree = FreeBSD::Portindex::Tree->new(
        -Env => {
            -Home => $Config{CacheDir},
            -Mode => 0644,
        },
        -Flags         => DB_CREATE,
        -CacheFilename => $Config{CacheFilename},
    );

    # Mark the time that the session starts

    update_timestamp( \%Config );

    # Regenerate the FreeBSD::Portindex::Port objects for all listed
    # ports (or the whole tree beneath some directory).

    $tree->scan_makefiles( $Config{PortsDir} );
}

__END__

=head1 NAME

cache-init -- Generate the portsindex cache by scanning the entire ports tree

=head1 SYNOPSIS

B<cache-init> [B<-hvqs>] [B<-c> F<dir>] [B<-C> F<file>] [B<-T> F<file>] [B<-p> F<dir>] [B<-M> F<file>]... [B<-m> F<file>]...

=head1 DESCRIPTION

B<cache-init> scans over the entire ports tree to initialise the
B<portindex> cache.  The cache consists of a Berkeley DB Btree file,
plus a timestamp file showing when the cache contents were last
modified.  The Btree file consistat of records indexed by the port
origin directory containing:

=over 8

=item *
	
The one-line port descriptions generated by C<make describe>.

=item *

A list of all of the other Makefiles included by that port Makefile
from within F</usr/ports> (or F<$PORTSDIR> if it is set in the
environment).  Any Makefiles included in the C<UbiquitousMakefiles> or
C<EndemicMakefiles> lists will be ommitted from the record.

=item *

A record of the master port when the port is a slave port, based on
the value of the C<MASTER_PORT> variable.

=back

B<cache-init> is used to bootstrap the B<portindex> data cache on a
new system.  It only needs to be run once, after which B<cache-update>
may be used to apply incremental updates to the data cache.  As
B<cache-init> is fairly slow to execute, dealing as it does with some
17,500 separate ports, running it repeatedly is a handy way to waste a
great deal of time.

B<cache-init> will preserve a backup copy of any pre-existing cache
files by renaming them with a F<.old> suffix.

=head2 When to run B<cache-init> and when to run B<cache-update>

Over time, successively updating the F<INDEX> file via B<cache-update>
can inevitably produce minor inconsistencies and an F<INDEX> file that
diverges slowly from equivalence to what C<make index> would produce.
For best results it will be necessary to occasionally re-run
B<cache-init> and rebuild the cache from scratch.  Certain changes to
your system should act as warning flags that this needs to be
considered.

=over 8

=item *

Modifications to ubiquitously included makefiles such as
F</etc/make.conf> or F</usr/ports/Mk/bsd.port.mk>.  B<cache-update>
will compare timestamps on these files with the cache timestamp and
attempt to warn you when a re-initialisation might be a good idea.

Technically there are several other makefiles which will always be
included any time B<make> is invoked within the ports system, but no
warnings will be generated for those files. They rarely change, and
when they do, the changes are unlikely to have any material effect on
the ports F<INDEX>.

B<cache-init> accepts the same C<UbiquitousMakefiles> and
C<EndemicMakefiles> configuration options as B<cache-update> (or the
equivalent command line options).  However, for B<cache-init> the
effect is different.  All that happens is that none of the Makefile
names mentioned in those lists will be included in the cached data.

=item *

Modifying the environment between successive runs of B<cache-update>.
B<make> variables can be set from the environment, although in most
cases using F</etc/make.conf> would generally be a better idea.  There
are two things that can be done to prevent this causing problems.
Firstly, the configuration file can contain live I<Perl> code: you can
modify the environment of the process by manipulating the global
C<%ENV> hash from within the configuration file.  Secondly both
B<cache-init> and B<cache-update> obey a C<--scrub-environment>
command line flag, and the equivalent C<ScrubEnvironment>
configuration file setting, which deletes everything from the
environment except for certain standard variables.  As command line
options generally override configuration files, C<--scrub-environment>
will trump modifying C<%ENV>.

=item *

Installing or updating certain software packages.  For instance, the
simple presence of the Gnome libraries on the system will cause many
packages to add a C<-gnome> suffix to their names.  The F<editors/vim>
port is a good example of this behaviour.  Ports containing Linux
software run under emulation will automatically detect which version
of the F<linux-base> ports you have installed: changing to a different
F<linux-base> port will affect the dependency lists for all Linux
software ports.  Unfortunately it is practically impossible to detect
such changes and automatically update affected ports.  These are not
the only two examples of such behaviour.

=back

=head2 Configuration Files

B<cache-init> shares configuration files with B<cache-update>,
B<find-updated> and B<portindex>.  Any configuration settings are
taken from the following locations, where the later items on this list
override the earlier:

=over 8

=item *

Built-in settings from the B<FreeBSD::Portindex::Config> perl module.

=item *

The system wide configuration file F</usr/local/etc/portindex.cfg>

=item *

The per-user configuration file F<${HOME}/.portindexrc>. This file is
ignored if the process is run as root.

=item *

The local configuration file, found in the current working directory
of the B<cache-init> process F<./.portindexrc>.  This file is ignored
if the process is run as root.

=item *

The program command line.

=back

All of the configuration files are optional.  A summary of the
resultant configuration options including the effect of any command
line settings is printed as part of the help text when B<cache-init>
is invoked with the C<-h> option.

=head1 OPTIONS

=over 8

=item B<-h>

=item B<--help>

Print a brief usage message and a summary of the configuration
settings after command line processing and then exit.

=item B<-v>

=item B<--verbose>

Turn on verbose output printed to C<STDERR>.  This is the default.

=item B<-q>

=item B<--quiet>

=item B<--noverbose>

Turn off verbose output to C<STDERR>.  Using both the B<-v> amd B<-q>
options together does not make any sense, but neither does it generate
an error.  The last mentioned of the two options will prevail.

=item B<-s>

=item B<--scrub-environment>

Delete all environment variables except for C<$USER>, C<$HOME>,
C<$PATH>, C<$SHELL>, C<$TERM> and C<$TERMCAP>.  This provides a
standardized environment for C<make describe> and other sub-processes.

=item B<--noscrub-environment>

Turn off environment scrubbing.  All environment variables will be
passed through intact to C<make describe> and other sub-processes.
This is the default.

=item B<-c> F<dir>

=item B<--cache-dir>=F<dir>

The location of the B<portindex> data cache, by default
F</var/db/portindex>.

=item B<-C> F<file>

=item B<--cache-file>=F<file>

Berkeley DB Btree file containing the cached output of the equivalent
of running C<make describe> plus C<make -V MASTER_PORT -V
.MAKEFILE_LIST> for all of the ports in the tree -- instead of
actually running C<make describe>, this program extracts the values of
a series of C<make> variables and generates the C<make describe>
output itself, internally.  This file name will be relative to the
cache directory (B<-c> option above) unless an absolute path is given.
Defaults to F<portindex-cache.db>.

=item B<-T> F<file>

=item B<--timestamp-file>=F<file>

A file within the cache directory whose modification time marks the
last time that data was modified in or added to the cache.  Defaults
to F<portsindex-timestamp>.

=item B<-p> F<dir>

=item B<--ports-dir>=F<dir>

The location of the ports tree. Almost always defaults to
F</usr/ports> unless C<$PORTSDIR> is set in the environment.

=item B<-M> F<file>

=item B<--ubiquitous-makefile> F<file>

=item B<-m> F<file>

=item B<--endemic-makefile> F<file>

These options both have exactly the same effect for B<cache-init>, and
are only provided separately for compatability with B<cache-update>.
Do not include the named makefile in the data cache as a trigger for
re-checking the C<make describe> output for any ports.  Non absolute
paths will be taken as relative to C<PortsDir>.  Repeat the option to
add more makefiles to the list.  Default values are:
(UbiquitousMakefiles) F</etc/make.conf>, F<${PORTSDIR}/Mk/bsd.port.mk>
(EndemicMakefiles) F<${PORTSDIR}/Mk/bsd.sites.mk>,
F<${PORTSDIR}/Mk/bsd.commands.mk>, F<${PORTSDIR}/Mk/bsd.destdir.mk>.

=back

=head1 FILES

=over 16

=item F</usr/ports>

The default ports directory.

=item F</var/db/portindex>

The location of the data cache.

=item F<portindex-cache.db>

Btree file containing cached C<make describe> and other output.

=item F<__db.001>, F<__db.002>, F<__db.003>, F<__db.004>

Files used as part of the internal workings of BerkeleyDB, for memory
pool management and DB locking.  Will be recreated automatically if
deleted.

=item F<portindex-timestamp>

This file contains the last time and date that the cache was updated
or modified.

=item F</usr/local/etc/portindex.cfg>

System-wide configuration file.

=item F<${HOME}/.portindexrc>

Per-user configuration file

=item F<./.portindexrc>

Local configuration file

=back

=head1 SEE ALSO

L<portindex(1)>, L<cache-update(1)>, L<find-updated(1)>, L<cvsup(1)>,
L<ports(7)>

=head1 BUGS

B<cache-init> is quite a lot slower than C<make index>.  Possible
improvements include running several C<make describe> processes
concurrently.

B<cache-init> should optionally parse the contents of
B</usr/local/etc/pkgtools.conf> and apply settings from the
C<MAKE_ENV> array.

=cut

#
# That's All Folks!
#


syntax highlighted by Code2HTML, v. 0.9.1