# -*-perl-*-
#
# Copyright (c) 1996-1998 Kevin Johnson <kjj@pobox.com>.
#
# All rights reserved. This program is free software; you can
# redistribute it and/or modify it under the same terms as Perl
# itself.
#
# $Id: Mbox.pm,v 1.6 1998/04/05 17:21:53 kjj Exp $

require 5.00397;

package Mail::Folder::Mbox;
use strict;
use vars qw($VERSION @ISA $folder_id);

@ISA = qw(Mail::Folder);
$VERSION = "0.07";

Mail::Folder->register_type('mbox');

=head1 NAME

Mail::Folder::Mbox - A Unix mbox interface for Mail::Folder.

B<WARNING: This code is in alpha release. Expect the interface to
change.>

=head1 SYNOPSIS

C<use Mail::Folder::Mbox;>

=head1 DESCRIPTION

This module provides an interface to unix B<mbox> folders.

The B<mbox> folder format is the standard monolithic folder structure
prevalent on Unix.  A single folder is contained within a single file.
Each message starts with a line matching C</^From /> and ends with a
blank line.

The folder architecture does not provide any persistantly stored
current message variable, so the current message in this folder
interface defaults to C<1> and is not retained between C<open>s of a
folder.

If the C<Timeout> option is specified when the object is created, that
value will be used to determine the timeout for attempting to aquire a
folder lock.  The default is 10 seconds.

If the C<DotLock> option is specified when the object is created, that
value will be used to determine whether or not to use 'C<.lock>' style
folder locking.  The default value is C<1>.

If the C<Flock> option is specified when the object is created, that
value will be used to determined whether or not to use C<flock> style
folder locking.  By default, the option is not set.

If the C<NFSLock> option is specified when the object is created, that
value will be used to determine whether or not special measures are
taken when doing C<DotLock>ing.  These special measures consist of
constructing the lock file in a special manner that is more immune to
atomicity problems with NFS when creating a folder lock file.  By
default, the option is not set.  This option necessitates the ability
to use long filenames.

It is currently a fatal error to have both C<DotLock> and C<Flock>
disabled.

**NOTE** flock locking is currently disabled until I can sift out the
'right way'. **NOTE**

=cut

use Mail::Folder;
use Mail::Internet;
use Mail::Header;
use Mail::Address;
use Date::Format;
use Date::Parse;
# use File::BasicFlock;
use IO::File;
use DirHandle;
use Sys::Hostname;		# for NFSLock option
use Carp;

$folder_id = 0;			# used to generate a unique id per open folder

=head1 METHODS

=head2 open($folder_name)

=over 2

=item * Call the superclass C<open> method.

=item * Check to see if it is a valid mbox folder.

=item * Mark it as readonly if the folder is not writable.

=item * Lock the folder.

=item * Split the folder into individual messages in a temporary
working directory.

=item * Unlock the folder.

=item * Cache all the headers.

=item * Update the appropriate labels with information in the
C<Status> fields.

=item * Set C<current_message> to C<1>.

=back

=cut

sub open {
  my $self = shift;
  my $foldername = shift;
  
  return 0 unless $self->SUPER::open($foldername);
  
  is_valid_folder_format($foldername) || (-z $foldername)
    or croak "$foldername isn't an mbox folder";
  
  if (($< == 0) || ($> == 0)) {	# if we're root we have to check it by hand
    $self->set_readonly unless ((stat($foldername))[2] & 0200);
  } else {
    $self->set_readonly unless (-w $foldername);
  }
  # $self->set_readonly unless (-w $foldername);
  
  $self->_lock_folder or return 0;
  
  my $fh = new IO::File $foldername or croak "can't open $foldername: $!";
  $fh->seek(0, 2);
  $self->{MBOX_OldSeekPos} = $fh->tell;
  $fh->close;

  my $qty_new_msgs = $self->_absorb_mbox($foldername, 0);
  unless (defined($qty_new_msgs) && $self->_unlock_folder) {
    $self->_clean_working_dir;
    return 0;
  }
  $self->current_message(1);
  
  return $qty_new_msgs;
}

=head2 close

Deletes the internal working copy of the folder and calls the
superclass C<close> method.

=cut

sub close {
  my $self = shift;

  $self->_clean_working_dir;
  return $self->SUPER::close;
}

=head2 sync

=over 2

=item * Call the superclass C<sync> method.

=item * Lock the folder.

=item * Extract into the temporary working directory any new messages
that have been appended to the folder since the last time the folder
was either C<open>ed or C<sync>ed.

=item * Create a new copy of the folder and populate it with the
messages in the working copy that are not flagged for deletion and
update the C<Status> fields appropriately.

=item * Move the original folder to a temp location

=item * Move the new folder into place

=item * Delete the old original folder

=item * Unlock the folder

=back

=cut

sub sync {
  my $self = shift;

  my @statary;
  my $folder = $self->foldername;
  my $tmpfolder = "$folder.$$";
  my $infh;
  my $outfh;

  return -1 if ($self->SUPER::sync == -1);

  my $last_msgnum = $self->last_message;

  return -1 unless ($self->_lock_folder);

  unless ($infh = new IO::File($folder)) {
    $self->_unlock_folder;
    croak "can't open $folder: $!";
  }
  $infh->close;

  my $qty_new_msgs = $self->_absorb_mbox($folder, $self->{MBOX_OldSeekPos});
  unless (defined($qty_new_msgs)) {
    $self->_unlock_folder;
  }

  unless ($self->is_readonly) {
    # we need to diddle current_message if it's pointing to a deleted msg
    my $msg = $self->current_message;
    while ($msg >= $self->first_message) {
      last if (!$self->label_exists($msg, 'deleted'));
      $msg = $self->prev_message($msg);
    }
    $self->current_message($msg);

    for my $msg ($self->select_label('deleted')) {
      unlink("$self->{MBOX_WorkingDir}/$msg");
      $self->forget_message($msg);
    }
    $self->clear_label('deleted');

    unless (@statary = stat($folder)) {
      $self->_unlock_folder;
      croak "can't stat $folder: $!";
    }

    unless ($outfh = new IO::File $tmpfolder, O_CREAT|O_WRONLY, 0600) {
      $self->_unlock_folder;
      croak "can't create $tmpfolder: $!";
    }

    # match the permissions of the original folder
    unless (chmod(($statary[2] & 0777), $tmpfolder)) {
      unlink($tmpfolder);
      $self->_unlock_folder;
      croak "can't chmod $tmpfolder: $!";
    }

    for my $msg (sort { $a <=> $b } $self->message_list) {
      my $mref = $self->get_message($msg);
      my $href = $self->get_header($msg);

      unless ($self->get_option('NotMUA')) {
	my $status = 'O';
	$status = 'RO' if $self->label_exists($msg, 'seen');
	$href->replace('Status', $status, -1);
      }
      
      my $from = $href->get('Mail-From') || $href->get('From ');
      
      # we dup them cuz we're going to modify them
      my $dup_href = $href->dup;
      my $dup_mref = $mref->dup;
      $dup_href->delete('Mail-From') if ($dup_href->count('Mail-From'));
      
      $outfh->print("From $from");
      $dup_href->print($outfh);
      $outfh->print("\n");
      $dup_mref->escape_from unless $self->get_option('Content-Length');
      $dup_mref->print_body($outfh);
      $outfh->print("\n");
    }
    $outfh->close;

    # Move the original folder to a temp location

    unless (rename($folder, "$folder.tmp")) {
      $self->_unlock_folder;
      croak "can't move $folder out of the way: $!";
    }
    
    # Move the new folder into place
    
    unless (rename($tmpfolder, $folder)) {
      $self->_unlock_folder;
      croak "gack! can't rename $folder.tmp to $folder: $!"
	unless (rename("$folder.tmp", $folder));
      croak "can't move $folder to $folder.tmp: $!";
    }
    
    # Delete the old original folder
    
    unless (unlink("$folder.tmp")) {
      $self->_unlock_folder;
      croak "can't unlink $folder.tmp: $!";
    }
  }

  $self->_unlock_folder;

  return $qty_new_msgs;
}

=head2 pack

Calls the superclass C<pack> method.

Renames the message list to that there are no gaps in the numbering
sequence.

It also tweaks the current_message accordingly.

=cut

sub pack {
  my $self = shift;

  my $newmsg = 0;
  my $curmsg = $self->current_message;

  return 0 if (!$self->SUPER::pack);

  for my $msg (sort { $a <=> $b } $self->message_list) {
    $newmsg++;
    if ($msg > $newmsg) {
      $self->current_message($newmsg) if ($msg == $curmsg);
      $self->remember_message($newmsg);
      $self->cache_header($newmsg, $self->{Messages}{$msg}{Header});
      $self->forget_message($msg);
    }
  }

  return 1;
}

=item get_message ($msg_number)

Calls the superclass C<get_message> method.

Retrieves the given mail message file into a B<Mail::Internet> object
reference, sets the 'C<seen>' label, and returns the reference.

If the 'Content-Length' option is not set, then C<get_message> will
unescape 'From ' lines in the body of the message.

=cut

sub get_message {
  my $self = shift;
  my $key = shift;

  return undef unless $self->SUPER::get_message($key);

  my $file = "$self->{MBOX_WorkingDir}/$key";

  my $fh = new IO::File $file or croak "whoa! can't open $file: $!";
  my $mref = new Mail::Internet($fh,
				Modify => 0,
				MailFrom => 'COERCE');
  $mref->unescape_from unless $self->get_option('Content-Length');
  $fh->close;

  my $href = $mref->head;
  $self->cache_header($key, $href);

  $self->add_label($key, 'seen');

  return $mref;
}

=item get_message_file ($msg_number)

Calls the superclass C<get_message_file> method.

Retrieves the given mail message file and returns the name of the file.

Returns C<undef> on failure.

This method does NOT currently do any 'From ' unescaping.

=cut

sub get_message_file {
  my $self = shift;
  my $key = shift;

  return undef unless $self->SUPER::get_message($key);

  return "$self->{MBOX_WorkingDir}/$key";
}

=head2 get_header($msg_number)

If the particular header has never been retrieved then C<get_header>
loads (in a manner similar to C<get_message>) the header of the given
mail message into C<$self-E<gt>{Messages}{$msg_number}{Header}> and
returns the object reference.

If the header for the given mail message has already been retrieved in
a prior call to C<get_header>, then the cached entry is returned.

It also calls the superclass C<get_header> method.

=cut

sub get_header {
  my $self = shift;
  my $key = shift;

  my $hdr = $self->SUPER::get_header($key);
  return $hdr if defined($hdr);
  
  # return undef unless ($self->SUPER::get_header($key));

  # return $self->{Messages}{$key}{Header} if ($self->{Messages}{$key}{Header});

  my $file = "$self->{MBOX_WorkingDir}/$key";

  my $fh = new IO::File $file or croak "can't open $file: $!";
  my $href = new Mail::Header($fh,
			      Modify => 0,
			      MailFrom => 'COERCE');
  $fh->close;

  $self->cache_header($key, $href);

  return $href;
}

=head2 append_message($mref)

=over 2

Calls the superclass C<append_message> method.

Creates a new mail message file, in the temporary working directory,
with the contents of the mail message contained in C<$mref>.
It will synthesize a 'From ' line if one is not present in
C<$mref>.

If the 'Content-Length' option is not set, then C<get_message> will
escape 'From ' lines in the body of the message.

=cut

sub append_message {
  my $self = shift;
  my $mref = shift;
  
  my $msgnum = $self->last_message;
  
  my $dup_mref = $mref->dup;

  return 0 unless $self->SUPER::append_message($dup_mref);

  my $dup_href = $mref->head->dup;
  $dup_mref->escape_from unless ($self->get_option('Content-Length'));
  
  $msgnum++;
  my $fh = new IO::File("$self->{MBOX_WorkingDir}/$msgnum",
			O_CREAT|O_WRONLY, 0600)
    or croak "can't create $self->{MBOX_WorkingDir}/$msgnum: $!";
  _coerce_header($dup_href);
  $dup_href->print($fh);
  $fh->print("\n");
  $dup_mref->print_body($fh);
  $fh->close;

  $self->remember_message($msgnum);
  
  return 1;
}

=head2 update_message($msg_number, $mref)

Calls the superclass C<update_message> method.

Replaces the message pointed to by C<$msg_number> with the contents of
the C<Mail::Internet> object reference C<$mref>.

It will synthesize a 'From ' line if one is not present in
$mref.

If the 'Content-Length' option is not set, then C<get_message> will
escape 'From ' lines in the body of the message.

=cut

sub update_message {
  my $self = shift;
  my $key = shift;
  my $mref = shift;
  
  my $file_pos = 0;
  my $filename = "$self->{MBOX_WorkingDir}/$key";
  
  my $dup_mref = $mref->dup;
  my $dup_href = $dup_mref->head->dup;

  return 0 unless $self->SUPER::update_message($key, $dup_mref);

  $dup_mref->escape_from unless $self->get_option('Content-Length');

  my $fh = new IO::File "$filename.new", O_CREAT|O_WRONLY, 0600
    or croak "can't create $filename.new: $!";
  _coerce_header($dup_href);
  $dup_href->print($fh);
  $fh->print("\n");
  $dup_mref->print_body($fh);
  $fh->close;

  rename("$filename.new", $filename) or
    croak "can't rename $filename.new to $filename: $!";
  
  return 1;
}

=head2 init

Initializes various items specific to B<Mbox>.

=over 2

=item * Determines an appropriate temporary directory.  If the
C<TMPDIR> environment variable is set, it uses that, otherwise it uses
C</tmp>.  The working directory will be a subdirectory in that
directory.

=item * Bumps a sequence number used for unique temporary filenames.

=item * Initializes C<$self-E<gt>{WorkingDir}> to the name of a
directory that will be used to hold the working copies of the messages
in the folder.

=back

=cut

sub init {
  my $self = shift;

  my $tmpdir = $ENV{TMPDIR} ? $ENV{TMPDIR} : "/tmp";

  $self->{MBOX_WorkingDir} = undef;
  $folder_id++;
  for my $i ($folder_id .. ($folder_id + 10)) {
    if (! -e "$tmpdir/mbox$folder_id.$$") {
      $self->{MBOX_WorkingDir} = "$tmpdir/mbox.$folder_id.$$";
      last;
    }
    $folder_id++;
  }
  croak "can't seem to be able to create a working directory\n"
    unless (defined($self->{MBOX_WorkingDir}));
  $self->set_option('DotLock', 1)
    unless defined($self->get_option('DotLock'));

  croak "flock locking currently not implemented - sorry..."
    if ($self->get_option('Flock'));

  return 1;
}

=head2 is_valid_folder_format($foldername)

Returns C<1> if the folder is a plain file and starts with the string
'C<From >', otherwise it returns C<0>.

Returns C<1> if the folder is a zero-length file and the
C<$Mail::Format::DefaultEmptyFileFormat> class variable is set to
'C<mbox>'.

Otherwise it returns C<0>.

=cut

sub is_valid_folder_format {
  my $foldername = shift;

  return 0 if (! -f $foldername);
  if (-z $foldername) {
    return 1 if ($Mail::Folder::DefaultEmptyFileFormat eq 'mbox');
    return 0;
  }

  my $fh = new IO::File $foldername or return 0;
  my $line = <$fh>;
  $fh->close;
  return($line =~ /^From /);
}

=head2 create($foldername)

Creates a new folder named C<$foldername>.  Returns C<0> if the folder
already exists, otherwise returns C<1>.

=cut

sub create {
  my $self = shift;
  my $foldername = shift;

  return 0 if (-e $foldername);
  my $fh = new IO::File $foldername, O_CREAT|O_WRONLY, 0600
    or croak "can't create $foldername: $!";
  $fh->close;
  return 1;
}
###############################################################################
sub DESTROY {
  my $self = shift;

  # all of these are just in case...
  # the appropriate methods should have removed them already...
  if ($self->{Creator} == $$) {
    $self->_unlock_folder;
    $self->_clean_working_dir;
  }
}
###############################################################################
sub _absorb_mbox {
  my $self = shift;
  my $folder = shift;
  my $seek_pos = shift;

  my $qty_new_msgs = 0;
  my $last_was_blank = 0;
  my $is_blank = 0;
  my $last_msgnum = $self->last_message;
  my $new_msgnum = $last_msgnum;
  my $outfile_is_open = 0;
  my $outfh;

  if (! -e $self->{MBOX_WorkingDir}) {
    mkdir($self->{MBOX_WorkingDir}, 0700)
      or (carp "can't create $self->{MBOX_WorkingDir}: $!" and return undef);
  } elsif (! -d $self->{MBOX_WorkingDir}) {
    carp "$self->{MBOX_WorkingDir} isn't a directory!";
    return undef;
  }

  my $infh = new IO::File $folder or croak "can't open $folder: $!";
  $infh->seek($seek_pos, 0)
    or (carp "can't seek to $seek_pos in $folder: $!" and return undef);
  while (<$infh>) {
    $is_blank = /^$/ ? 1 : 0;
    if (/^From /) {
      $outfh->close if ($outfile_is_open);
      $outfile_is_open = 0;
      $new_msgnum++;
      $qty_new_msgs++;
      $self->remember_message($new_msgnum);
      $outfh = new IO::File("$self->{MBOX_WorkingDir}/$new_msgnum",
			    O_CREAT|O_WRONLY, 0600)
	or (carp "can't create $self->{MBOX_WorkingDir}/$new_msgnum: $!"
	    and return undef);
      $outfile_is_open++;
    } else {
      $outfh->print("\n") if ($last_was_blank);
    }
    $last_was_blank = $is_blank ? 1 : 0;
    $outfh->print($_) if !$is_blank;
  }
  $outfh->close if ($outfile_is_open);
  $self->{MBOX_OldSeekPos} = $infh->tell;
  $infh->close;

  for my $msg (($last_msgnum + 1) .. $self->last_message) {
    my $href = $self->get_header($msg);
    my $status = $href->get('Status') or next;
    $self->add_label($msg, 'seen') if ($status =~ /R/);
  }

  return $qty_new_msgs;
}

# Mbox files must have a 'From ' line at the beginning of each
# message.  This routine will synthesize one from the 'From:' and
# 'Date:' fields.  Original solution and code of the following
# subroutine provided by Andreas Koenig

# Since Mail::Header could have been told to coerce the 'From ' into a
# Mail-From field, we look for both, and neither is found then
# synthesize one.  In either case, a 'From ' string is returned.

sub _coerce_header {
  my $href = shift;
  my $from = '';
  my $date = '';
  
  my $mailfrom = $href->get('From ') || $href->get('Mail-From');
  
  unless ($mailfrom) {
    if ($from =
	$href->get('Reply-To') ||
	$href->get('From') ||
	$href->get('Sender') ||
	$href->get('Return-Path')) { # this is dubious
      my @addrs = Mail::Address->parse($from);
      $from = $addrs[0]->address();
    } else {
      $from = 'NOFROM';
    }
    
    if ($date = $href->get('Date')) {
      chomp($date);
      $date = gmtime(str2time($date));
    } else {
      # There was no date field. Let's just stuff today's date in there
      # for lack of a better value. I think it should be gmtime - someone
      # correct me if this is wrong.
      $date = gmtime;
    }
    chomp($date);
    $mailfrom = "$from $date\n";
  }
  
  $href->delete('From ');
  $href->delete('Mail-From');
  
  $href->mail_from('KEEP');
  $href->add('From ', $mailfrom, 0);
  $href->mail_from('COERCE');
  
  return $href;
}

sub _clean_working_dir {
  my $self = shift;
  # unlink(glob("$self->{MBOX_WorkingDir}/*"));
  # maybe this should filter out directories, just to be safe...
  my $dir = DirHandle->new($self->{MBOX_WorkingDir})
    or croak "yeep! can't read $self->{MBOX_WorkingDir} disappeared: $!\n";
  for my $file ($dir->read) {
    next if (($file eq '.') || ($file eq '..'));
    next if (-d "$self->{MBOX_WorkingDir}/$file");
    unlink "$self->{MBOX_WorkingDir}/$file";
  }
  $dir->close;
  rmdir($self->{MBOX_WorkingDir});
}

sub _lock_folder {
  my $self = shift;
  my $folder = $self->foldername;

  croak "DotLock and Flock are both disabled\n"
    unless ($self->get_option('DotLock') || $self->get_option('Flock'));

  my $timeout = $self->get_option('Timeout');
  $timeout ||= 10;
  my $sleep = 1.0;		# maybe this should be configurable

  if ($self->get_option('DotLock')) {
    my $nfshack = 0;
    my $lockfile = "$folder.lock";
    if ($self->get_option('NFSLock')) {
      my $host = hostname;
      $nfshack++;
      my $time = time;
      $lockfile .= ".$time.$$.$host";
    }
    for my $num (1 .. int($timeout / $sleep)) {
      my $fh;
      if ($fh = new IO::File $lockfile, O_CREAT|O_EXCL|O_WRONLY, 0600) {
	$fh->close;
	if ($nfshack) {
	  # Whhheeeee!!!!!
	  # In NFS, the O_CREAT|O_EXCL isn't guaranteed to be atomic.
	  # So we create a temp file that is probably unique in space
	  # and time ($folder.lock.$time.$pid.$host).
	  # Then we use link to create the real lock file. Since link
	  # is atomic across nfs, this works.
	  # It loses if it's on a filesystem that doesn't do long filenames.
	  link $lockfile, "$folder.lock"
	    or carp "link return: $!\n";
	  my @statary = stat($lockfile);
	  unlink $lockfile;
	  if (!defined(@statary) || $statary[3] != 2) { # failed to link?
	    goto RETRY;
	  }
	}
	return 1;
      }
    RETRY:
      last if ($! =~ /denied/);	# failure due to permissions
      select(undef, undef, undef, $sleep);
    }
    return 0;
  }

  # return lock($folder) if ($self->get_option('Flock'));
  return 0;
}

sub _unlock_folder {
  my $self = shift;
  my $folder = $self->foldername;

  if ($self->get_option('DotLock')) {
    return unlink("$folder.lock") if (-e "$folder.lock");
    return 1;
  }

  # return unlock($folder) if ($self->get_option('Flock'));
  return 0;
}

=head1 AUTHOR

Kevin Johnson E<lt>F<kjj@pobox.com>E<gt>

=head1 COPYRIGHT

Copyright (c) 1996-1998 Kevin Johnson <kjj@pobox.com>.

All rights reserved. This program is free software; you can
redistribute it and/or modify it under the same terms as Perl itself.

=cut

1;


syntax highlighted by Code2HTML, v. 0.9.1