# $Revision: 1.20 $
# $Id: Freshmeat.pm,v 1.20 2002/07/22 08:35:42 afoxson Exp $
# Mail::Freshmeat - parses daily newsletters from http://freshmeat.net/
# Copyright (c) 2002 Adam J. Foxson. All rights reserved.
# Copyright (c) 1999-2000 Adam Spiers. All rights reserved.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
package Mail::Freshmeat;
use strict;
use 5.005;
use Carp;
use vars qw($VERSION @ISA $AUTOLOAD);
use Mail::Internet;
use Mail::Freshmeat::Entry;
use Mail::Freshmeat::Article;
use Mail::Freshmeat::Utils;
local $^W;
@ISA = qw(Mail::Internet);
($VERSION) = '$Revision: 1.20 $' =~ /\s+(\d+\.\d+)\s+/;
sub new
{
my $self = shift;
my $parser = $self->SUPER::new(@_);
# these are the allowed newsletter accessors
$parser->{_fm_is_attr} =
{
map {$_ => 1} qw
(
_date
_links_header _links
_ad_header _ad
_headlines_header _headlines
_entries_header _entries_payload _entries_total
_articles_header _articles_payload _articles_total
_footer
_full
)
};
return $parser->_parse();
}
sub _parse
{
my $self = shift;
$self->_parse_structure();
$self->_parse_articles();
$self->_parse_entries();
$self->_fix_headlines();
return $self;
}
sub _parse_structure
{
my $self = shift;
my $body = join '', @{$self->body()};
$body =~ s/\n{2,}/\n\n/g;
$body =~
m!
^ (:::\ L\ I\ N\ K\ S\ \ \ F\ O\ R\ \ \ T\ H\ E\ \ \ D\ A\ Y\ :::) \s* $ \n
$_blank_line
((?s: .+? (\d{4}\/\d{2}\/\d{2}) .+? )) \n?
$_blank_line
$_sep
(?:
$_blank_line
^ (:::\ A\ D\ V\ E\ R\ T\ I\ S\ I\ N\ G\ :::) \s* $ \n
$_blank_line
((?s:.+?)) \n?
$_blank_line
$_sep
)?
(?:
$_blank_line
^ (:::\ A\ R\ T\ I\ C\ L\ E\ S\ \((\d+)\)\ :::) \s* $ \n
$_blank_line
((?s:.+?)) \n?
$_blank_line
$_sep
)?
$_blank_line
^ (:::\ R\ E\ L\ E\ A\ S\ E\ \ \ H\ E\ A\ D\ L\ I\ N\ E\ S\ \((\d+)\)\ :::) \s* $ \n
$_blank_line
((?: ^ \[\d+\]\ .* $ \n | ^ [^\)]+ \) $ \n)+)
$_blank_line
$_sep
$_blank_line
^ (:::\ R\ E\ L\ E\ A\ S\ E\ \ \ D\ E\ T\ A\ I\ L\ S\ :::) \s* $ \n
$_blank_line
((?s:.+?)) \n
$_blank_line
$_sep
$_blank_line
^ _+ \s* $ \n
((?s:.+)) \n{2}
!mx or _fatal_bug("Couldn't parse newsletter structure (body).");
$self->{_fm_links_header} = $1;
$self->{_fm_links} = $2;
$self->{_fm_date} = $3;
$self->{_fm_ad_header} = $4;
$self->{_fm_ad} = $5;
$self->{_fm_articles_header} = $6;
$self->{_fm_articles_total} = $7;
$self->{_fm_articles_payload} = $8;
$self->{_fm_headlines_header} = $9;
$self->{_fm_entries_total} = $10;
$self->{_fm_headlines} = $11;
$self->{_fm_entries_header} = $12;
$self->{_fm_entries_payload} = $13;
$self->{_fm_footer} = $14;
$self->{_fm_full} = $body;
chomp $self->{_fm_headlines};
for my $key (keys %$self)
{
$self->{$key} = '' if not defined $self->{$key};
}
}
sub _parse_articles
{
my $self = shift;
my $count = 1;
my @articles;
return if not $self->articles_payload();
for my $article
(
split
m/
\/ $ \n
/mx,
$self->articles_payload()
)
{
my $new_article = Mail::Freshmeat::Article->new($article, $count);
push @articles, $new_article;
$count++;
}
my $total_articles = scalar @articles;
if ($total_articles != $self->articles_total())
{
_fatal_bug("Counted articles don't match what the newsletter claims " .
"($total_articles/${\($self->articles_total())}).");
}
$self->{_fm_articles} = \@articles;
}
sub _parse_entries
{
my $self = shift;
my $count = 1;
my @entries;
for my $entry
(
split
m/
$_blank_line
^ \s* -\ %\ \ -\ %\ \ -\ %\ -\ %\ - \s* $ \n
$_blank_line
/mx,
$self->entries_payload()
)
{
my $new_entry = Mail::Freshmeat::Entry->new($entry, $count);
push @entries, $new_entry;
$count++;
}
my $total_entries = scalar @entries;
if ($total_entries != $self->entries_total())
{
_fatal_bug("Counted entries don't match what the newsletter claims " .
"($total_entries/${\($self->entries_total())}).");
}
$self->{_fm_entries} = \@entries;
}
# This unfortunately is need since some of the individual one-line headline
# entries wrap over to the second line
sub _fix_headlines
{
my $self = shift;
my $buffer = '';
for my $entry (split /\n/, $self->headlines())
{
if ($entry =~ /^\[\d{3}/)
{
$buffer .= "$entry\n";
}
else
{
chop $buffer;
$buffer .= " $entry\n";
}
}
chomp $buffer;
$self->{_fm_headlines} = $buffer;
}
sub articles
{
my $self = shift;
croak "articles is not a class method" if not ref $self;
return if not exists $self->{_fm_articles};
wantarray ? @{$self->{_fm_articles}} : $self->{_fm_articles};
}
sub entries
{
my $self = shift;
croak "entries is not a class method" if not ref $self;
return if not exists $self->{_fm_entries};
wantarray ? @{$self->{_fm_entries}} : $self->{_fm_entries};
}
sub AUTOLOAD
{
my $self = $_[0];
my ($package, $method) = ($AUTOLOAD =~ /(.*)::(.*)/);
return if $method =~ /^DESTROY$/;
croak "$method is not a class method or does not exist" if not ref $self;
unless ($self->{_fm_is_attr}->{"_$method"})
{
croak "No such newsletter accessor: $method; aborting";
}
my $code = q
{
sub
{
my $self = shift;
return $self->{_fm_METHOD};
}
};
$code =~ s/METHOD/$method/g;
{
no strict 'refs';
*$AUTOLOAD = eval $code;
}
goto &$AUTOLOAD;
}
1;
__END__
=head1 NAME
Mail::Freshmeat - parses daily newsletters from http://freshmeat.net/
=head1 SYNOPSIS
use Mail::Freshmeat;
my $newsletter = Mail::Freshmeat->new(\*STDIN);
print "Date: ", $newsletter->date(), "\n";
print "Total entries: ", $newsletter->entries_total(), "\n";
for my $entry ($newsletter->entries())
{
print "Position: ", $entry->position(), "\n";
print "Name and version: ", $entry->name_and_version(), "\n";
}
=head1 DESCRIPTION
IMPORTANT: DUE TO FRESHMEAT.NET CHANGING THE STRUCTURE OF THEIR
NEWSLETTERS, THE INTERFACE FOR THIS PACKAGE HAS CHANGED
SINCE Mail::Freshmeat 0.94.
Mail::Freshmeat is a subclass of B<Mail::Internet>.
This package provides parsing of the daily e-mail newsletters which
are sent out from F<http://freshmeat.net/> to any individual who
requests them.
=head1 NEWSLETTER METHODS
=over 4
=item * B<new>
This is the constructor. Pass it something that Mail::Internet will like
such as a file descriptor (reference to a GLOB) or a reference to an
array and you will get back a newsletter object.
=item * B<entries>
This object method will return an array or an array reference (depending
on context) of entry objects for all of the entries in the newsletter.
=item * B<articles>
This object method will return an array or an array reference (depending
on context) of article objects for all of the articles in the newsletter.
=back
=head1 ENTRY METHODS
=over 4
=item * B<entry_keys>
This object method will return an array or an array reference (depending
on context) of all the attribute names of an entry (e.g.: position, name,
license, url) in the order that they appeared.
=item * B<short_entry>
This object method will return the short description of the entry as it
appeared in the newsletter headlines section (eg: Linux 2.4.9-ac15 (2.4-ac))
=back
=head1 ARTICLE METHODS
=over 4
=item * B<article_keys>
This object method will return an array or an array reference (depending
on context) of all the attribute names of an article (e.g.: title,
description, url) in the order that they appeared.
=back
=head1 NEWSLETTER ACCESSORS
=over 4
=item * B<ad>
=item * B<ad_header>
=item * B<articles_header>
=item * B<articles_payload>
=item * B<articles_total>
=item * B<date>
=item * B<entries_header>
=item * B<entries_payload>
=item * B<entries_total>
=item * B<footer>
=item * B<full>
=item * B<headlines>
=item * B<headlines_header>
=item * B<links>
=item * B<links_header>
=back
=head1 ENTRY ACCESSORS
=over 4
=item * B<about>
=item * B<changes>
=item * B<full>
=item * B<license>
=item * B<name>
=item * B<name_and_version>
=item * B<position>
=item * B<posted_by_name>
=item * B<posted_by_url>
=item * B<posted_on>
=item * B<trove>
=item * B<url>
=item * B<version>
=back
=head1 ARTICLE ACCESSORS
=over 4
=item * B<description>
=item * B<full>
=item * B<posted_by_name>
=item * B<posted_by_url>
=item * B<posted_on>
=item * B<section>
=item * B<title>
=item * B<url>
=back
=head1 AUTHORS
=item Adam J. Foxson B<afoxson@pobox.com>, 2002-
=item Adam Spiers B<adam@spiers.net>, 1999-2000
=head1 LICENSE
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
=head1 VERSION
This is release 1.20.
=head1 SEE ALSO
perl(1).
=cut
syntax highlighted by Code2HTML, v. 0.9.1