##################################################################
package AnyData;
###################################################################
#
# This module is copyright (c), 2000 by Jeff Zucker
# All rights reserved.
#
###################################################################
use strict;
require Exporter;
use AnyData::Storage::TiedHash;
use vars qw( @ISA @EXPORT $VERSION );
@ISA = qw(Exporter);
@EXPORT = qw( adConvert adTie adRows adColumn adExport adDump adNames adFormats);
#@EXPORT = qw( ad_fields adTable adErr adArray);
$VERSION = '0.10';
sub new {
my $class = shift;
my $format = shift;
my $flags = shift || {};
my $del_marker = "\0";
$format = 'CSV' if $format eq 'ARRAY';
my $parser_name = 'AnyData/Format/' . $format . '.pm';
eval { require $parser_name; };
die "Error Opening File-Parser: $@" if $@;
$parser_name =~ s#/#::#g;
$parser_name =~ s#\.pm$##g;
my $col_names = $flags->{col_names} || undef;
if ($col_names) {
my @cols;
@cols = ref $col_names eq 'ARRAY'
? @$col_names
: split ',',$col_names;
$flags->{col_names} = \@cols;
}
$flags->{del_marker} = $del_marker;
$flags->{records} ||= $flags->{data};
$flags->{field_sep} ||= $flags->{sep_char} ||= $flags->{ad_sep_char};
$flags->{quote} ||= $flags->{quote_char} ||= $flags->{ad_quote_char};
$flags->{escape} ||= $flags->{escape_char}||= $flags->{ad_escape_char};
$flags->{record_sep}||= $flags->{eol} ||= $flags->{ad_eol};
# $flags->{skip_first_row}
my $parser = $parser_name->new ($flags);
if ($parser->{col_names} && !$col_names) {
my @cols;
@cols = ref $parser->{col_names} eq 'ARRAY'
? @{$parser->{col_names}}
: split ',',$parser->{col_names};
$flags->{col_names} = \@cols;
$parser->{col_names} = \@cols;
}
my $storage_name = $flags->{storage}
|| $parser->storage_type()
|| 'File';
$storage_name = "AnyData/Storage/$storage_name.pm";
eval { require $storage_name; };
die "Error Opening Storage Module: $@" if $@;
$storage_name =~ s#/#::#g;
$storage_name =~ s#\.pm$##g;
my $storage = new $storage_name({del_marker=>$del_marker,%$flags});
if ($storage_name =~ 'PassThru') {
$storage->{parser} = $parser;
$parser->{del_marker} = "\0";
$parser->{url} = $flags->{file}
if $flags->{file} and $flags->{file} =~ /http:|ftp:/;
}
my $self = {
storage => $storage,
parser => $parser,
};
return( bless($self,$class) );
}
sub adFormats {
my @formats;
for my $dir(@INC) {
my $format_dir = "$dir/AnyData/Format";
if ( -d $format_dir ) {
local *D;
opendir(D,$format_dir);
@formats = grep {/\.pm$/} readdir(D);
last;
}
}
unshift @formats,'ARRAY';
@formats = map {s/^(.*)\.pm$/$1/;$_} @formats;
return @formats;
}
sub export {
my $self=shift;
my $fh = $self->{storage}->{fh};
my $mode = $self->{storage}->{open_mode} || 'r';
# if ( $self->{parser}->{export_on_close}
# && $self->{storage}->{fh}
# && $mode ne 'r'
# ){
return $self->{parser}->export( $self->{storage}, @_ );
# }
}
sub DESTROY {
my $self=shift;
# $self->export;
$self->zpack;
#print "AD DESTROYED ";
}
##########################################
# DBD STUFF
##########################################
# required only for DBD-AnyData
##########################################
sub prep_dbd_table {
my $self = shift;
my $tname = shift;
my $createMode = shift;
my $col_names;
my $col_nums;
my $first_row_pos;
if (!$createMode) {
$col_names = $self->{storage}->get_col_names($self->{parser});
$col_nums = $self->{storage}->set_col_nums();
$first_row_pos = $self->{storage}->{first_row_pos};
}
die "ERROR: No Column Names!:", $self->{storage}->{open_mode}
if (!$col_names || !scalar @$col_names)
&& 'ru' =~ $self->{storage}->{open_mode}
&& !$createMode eq 'o';
my $table = {
NAME => $tname,
DATA => [],
CURRENT_ROW => 0,
col_names => $col_names,
col_nums => $col_nums,
first_row_pos => $first_row_pos,
fh => $self->{storage}->get_file_handle,
file => $self->{storage}->get_file_name,
ad => $self,
};
#use Data::Dumper; print Dumper $table;
return $table;
}
sub fetch_row {
my $self = shift;
my $requested_cols = shift || [];
my $rec;
if ( $self->{parser}->{skip_pattern} ) {
my $found;
while (!$found) {
$rec = $self->{storage}->file2str($self->{parser},$requested_cols);
last if !defined $rec;
next if $rec =~ $self->{parser}->{skip_pattern};
last;
}
}
else {
$rec = $self->{storage}->file2str($self->{parser},$requested_cols);
}
return $rec if ref $rec eq 'ARRAY';
return unless $rec;
my @fields = $self->{parser}->read_fields($rec);
return undef if scalar @fields == 1 and !defined $fields[0];
return \@fields;
}
sub fetch_rowNEW {
my $self = shift;
my $requested_cols = shift || [];
my $rec = $self->{storage}->file2str($self->{parser},$requested_cols);
my @fields;
if (ref $rec eq 'ARRAY') {
@fields = @$rec;
}
else {
return unless defined $rec;
my @fields = $self->{parser}->read_fields($rec);
return undef if scalar @fields == 1 and !defined $fields[0];
}
if ( my $subs = $self->{parser}->{read_sub} ) {
for (@$subs) {
my($col,$sub) = @$_;
next unless defined $col;
my $col_num = $self->{storage}->{col_nums}->{$col};
next unless defined $col_num;
$fields[$col_num] = &$sub($fields[$col_num]);
}
}
return \@fields;
}
sub push_names {
my $self = shift;
my $col_names = shift || undef;
#print "Can't find column names!" unless scalar @$col_names;
$self->{storage}->print_col_names( $self->{parser}, $col_names )
unless $self->{parser}->{col_names} && $self->parser_type ne 'XML';
# $self->set_col_nums;
$self->{parser}->{key} ||= $col_names->[0];
#use Data::Dumper; print Dumper $self; exit;
}
sub drop { shift->{storage}->drop(@_); }
sub truncate { shift->{storage}->truncate(@_) }
##################################################################
# END OF DBD STUFF
##################################################################
##################################################################
# REQUIRED BY BOTH DBD AND TIEDHASH
##################################################################
sub push_row {
my $self = shift;
die "ERROR: No Column Names!" unless scalar @{$self->col_names};
my $requested_cols = [];
my @row = @_;
if (ref($row[0]) eq 'ARRAY') {
$requested_cols = shift @row;
}
my $rec = $self->{parser}->write_fields(@row) or return undef;
return $self->{storage}->push_row( $rec, $self->{parser}, $requested_cols);
}
sub push_rowNEW {
my $self = shift;
#print "PUSHING... ";
die "ERROR: No Column Names!" unless scalar @{$self->col_names};
my $requested_cols = [];
my @row = @_;
use Data::Dumper;
#print "PUSHING ", Dumper \@row;
if (ref($row[0]) eq 'ARRAY') {
$requested_cols = shift @row;
}
my $rec = $self->{parser}->write_fields(@row) or return undef;
return $self->{storage}->push_row( $rec, $self->{parser}, $requested_cols);
}
sub seek { shift->{storage}->seek(@_); }
sub seek_first_record {
my $self=shift;
$self->{storage}->seek_first_record($self->{parser});
}
sub col_names {
my $self = shift;
my $c = $self->{storage}->{col_names};
$c = $self->{parser}->{col_names} unless (ref $c eq 'ARRAY') and scalar @$c;
$c ||= [];
}
sub is_url {
my $file = shift;
return $file if $file and $file =~ m"^http://|ftp://";
}
sub adTable {
###########################################################
# Patch from Wes Hardaker
###########################################################
# my($formatref,$file,$read_mode,$lockMode,$othflags)=@_;
my($formatref,$file,$read_mode,$lockMode,$othflags,$tname)=@_;
###########################################################
#use Data::Dumper; print Dumper \@_;
my($format,$flags);
$file ||= '';
my $url = is_url($file);
$flags = {};
$othflags ||= {};
if ( ref $formatref eq 'HASH' or $othflags->{data}) {
$format = 'Base';
$flags = $othflags;
if (ref $formatref eq 'HASH') {
%$flags = (%$formatref,%$othflags);
}
}
else {
($format,$flags) = split_params($formatref);
$othflags ||= {};
%$flags = (%$flags,%$othflags);
}
if ( $flags->{cols} ) {
$flags->{col_names} = $flags->{cols};
delete $flags->{cols};
}
if (ref($file) eq 'ARRAY') {
if ($format eq 'Mp3' or $format eq 'FileSys') {
$flags->{dirs} = $file;
}
else {
$flags->{recs} = join '',@$file;
$flags->{recs} = $file if $format =~ /ARRAY/i;
$flags->{storage} = 'RAM' unless $format eq 'XML';
$read_mode = 'u';
}
}
else {
$flags->{file} = $file;
}
if ($format ne 'XML' and ($format eq 'Base' or $url) ) {
my $x;
$flags->{storage} = 'RAM';
delete $flags->{recs};
my $ad = AnyData->new( $format, $flags);
$format eq 'Base'
? $ad->open_table( $file )
: $ad->open_table( $file, 'r',
$ad->{storage}->get_remote_data($file)
);
return $ad;
}
my $ad = AnyData->new( $format, $flags);
my $createMode = 0;
$createMode = $read_mode if defined $lockMode;
$read_mode = 'c' if $createMode and $lockMode;
$read_mode = 'u' if !$createMode and $lockMode;
$read_mode ||= 'r';
$ad->{parser}->{keep_first_line} = 1
if $flags->{col_names} and 'ru' =~ /$read_mode/;
#####################################################
# Patch from Wes Hardaker
#####################################################
# $ad->open_table( $file, $read_mode );
## $ad->open_table( $file, $read_mode, $tname );
$ad->open_table( $file, $read_mode, $tname );
# use Data::Dumper; my $x = $ad; delete $x->{parser}->{twig}; delete $x->{parser}->{record_tag}; delete $x->{parser}->{current_element}; print Dumper $x;
#####################################################
return $ad;
}
sub open_table {
my $self = shift;
$self->{storage}->open_table( $self->{parser}, @_ );
my $col_names = $self->col_names();
$self->{parser}->{key} ||= '';
$self->{parser}->{key} ||= $col_names->[0] if $col_names->[0];
}
##################################################################
##################################################################
# TIEDHASH STUFF
##################################################################
sub key_col { shift->{parser}->{key} }
sub fetchrow_hashref {
my $self = shift;
my $rec = $self->get_undeleted_record or return undef;
my @fields = ref $rec eq 'ARRAY'
? @$rec
: $self->{parser}->read_fields($rec);
my $col_names = $self->col_names();
return undef unless scalar @fields;
return undef if scalar @fields == 1 and !defined $fields[0];
my $rowhash;
@{$rowhash}{@$col_names} = @fields;
return ( $rowhash );
}
sub get_undeleted_record {
my $self = shift;
my $rec;
my $found=0;
return $self->fetch_row if $self->parser_type eq 'XML';
while (!$found) {
my $test = $rec = $self->{storage}->file2str($self->{parser});
return if !defined $rec;
next if $self->{storage}->is_deleted($self->{parser});
next if $self->{parser}->{skip_pattern}
and $rec =~ $self->{parser}->{skip_pattern};
last;
}
return $rec;
# return $rec if ref $rec eq 'ARRAY';
# return unless $rec;
# my @fields = $self->{parser}->read_fields($rec);
# return undef if scalar @fields == 1 and !defined $fields[0];
# return \@fields;
}
sub update_single_row {
my $self = shift;
my $oldrow = shift;
my $newvals = shift;
my @colnames = @{ $self->col_names };
my @newrow;
my $requested_cols = [];
for my $i(0..$#colnames) {
push @$requested_cols, $colnames[$i] if defined $newvals->{$colnames[$i]};
$newrow[$i] = $newvals->{$colnames[$i]};
$newrow[$i] = $oldrow->{$colnames[$i]} unless defined $newrow[$i];
}
unshift @newrow, $requested_cols;
$self->{storage}->seek(0,2);
$self->push_row( @newrow );
return \@newrow;
}
sub update_multiple_rows {
my $self = shift;
my $key = shift;
my $values = shift;
$self->seek_first_record;
my @rows_to_update;
while (my $row = $self->fetchrow_hashref) {
next unless $self->match($row,$key);
$self->{parser}->{has_update_function}
? $self->update_single_row($row,$values)
: $self->delete_single_row();
$self->{parser}->{has_update_function}
? push @rows_to_update,1
: push @rows_to_update,$row;
}
if (!$self->{parser}->{has_update_function}) {
for (@rows_to_update) {
$self->update_single_row($_,$values);
}
}
return scalar @rows_to_update;
}
sub match {
my($self,$row,$key) = @_;
if ( ref $key ne 'HASH') {
return 0 if !$row->{$self->key_col}
or $row->{$self->key_col} ne $key;
return 1;
}
my $found = 0;
while (my($col,$re)=each %$key) {
next unless defined $row->{$col} and is_matched($row->{$col},$re);
$found++;
}
return 1 if $found == scalar keys %$key;
}
sub is_matched {
my($str,$re)=@_;
if (ref $re eq 'Regexp') {
return $str =~ /$re/ ? 1 : 0;
}
my($op,$val);
if ( $re and $re =~/^(\S*)\s+(.*)/ ) {
$op = $1;
$val = $2;
}
elsif ($re) {
return $str =~ /$re/ ? 1 : 0;
}
else {
return $str eq '' ? 1 : 0;
}
my $numop = '< > == != <= >=';
my $chrop = 'lt gt eq ne le ge';
if (!($numop =~ /$op/) and !($chrop =~ /$op/)) {
return $str =~ /$re/ ? 1 : 0;
}
if ($op eq '<' ) { return $str < $val; }
if ($op eq '>' ) { return $str > $val; }
if ($op eq '==') { return $str == $val; }
if ($op eq '!=') { return $str != $val; }
if ($op eq '<=') { return $str <= $val; }
if ($op eq '>=') { return $str >= $val; }
if ($op eq 'lt') { return $str lt $val; }
if ($op eq 'gt') { return $str gt $val; }
if ($op eq 'eq') { return $str eq $val; }
if ($op eq 'ne') { return $str ne $val; }
if ($op eq 'le') { return $str le $val; }
if ($op eq 'ge') { return $str ge $val; }
}
sub delete_single_row {
my $self = shift;
# my $curpos = $self->{storage}->get_pos;
$self->{storage}->delete_record($self->{parser});
# $self->{storage}->go_pos($curpos);
$self->{needs_packing}++;
}
sub delete_multiple_rows {
my $self = shift;
my $key = shift;
$self->seek_first_record;
my $rows_deleted =0;
while (my $row = $self->fetchrow_hashref) {
next unless $self->match($row,$key);
$self->delete_single_row;
$rows_deleted++;
}
return $rows_deleted;
}
sub adNames { @{ shift->{__colnames}} }
sub adDump {
my $table = shift;
my $pat = shift;
die "No table defined" unless $table;
my $ad = tied(%$table)->{ad};
my @cols = @{ $ad->col_names };
print "<",join(":", @cols), ">\n";
while (my $row = each %$table) {
my @row = map {$row->{$_} || ''} @cols;
for (@row) { print "[$_]"; }
print "\n";
}
}
sub adRows {
my $thash = shift;
my %keys = @_;
my $obj = tied(%$thash);
return $obj->adRows(\%keys)
}
sub adColumn {
my $thash = shift;
my $column = shift;
my $obj = tied(%$thash);
return $obj->adColumn($column)
}
sub adArray {
my($format,$data)=@_;
my $t = adTie( $format, $data );
my $t1 = tied(%$t);
my $ad = $t1->{ad};
my $arrayref = $ad->{storage}->{records};
unshift @$arrayref, $ad->{storage}->{col_names};
return $arrayref;
}
##################################################################
# END OF TIEDHASH STUFF
##################################################################
sub parser_type {
my $type = ref shift->{parser};
$type =~ s/AnyData::Format::(.*)/$1/;
return $type;
}
sub zpack {
my $self = shift;
return if $self->{storage}->{no_pack};
return if (ref $self->{storage} ) !~ /File$/;
# return unless $self->{needs_packing};
# $self->{needs_packing} = 0;
return unless scalar(keys %{ $self->{storage}->{deleted} } );
$self->{needs_packing} = 0;
# my @callA = caller 2;
# my @callB = caller 3;
# return if $callA[3] =~ /DBD/;
# return if $callB[3] and $callB[3] =~ /SQL::Statement/;
# return if $self->{parser}->{export_on_close};
#print "PACKING";
my $bak_file = $self->{storage}->get_file_name . '.bak';
my $bak = adTable( 'Text', $bak_file, 'o' );
my $bak_fh = $bak->{storage}->get_file_handle;
my $fh = $self->{storage}->get_file_handle;
die "Can't pack to backup $!" unless $fh and $bak_fh;
# $self->seek_first_record;
$fh->seek(0,0) || die $!;
#$bak_fh->seek(0,0) || die $!;
# while (my $line = $self->get_record) {
# next if $self->is_deleted($line);
while (my $line = $self->get_undeleted_record) {
my $tmpstr = $bak->{parser}->write_fields($line)
. $self->{parser}->{record_sep};
$bak_fh->write($tmpstr,length $tmpstr);
}
$fh->seek(0,0);
$fh->truncate(0) || die $!;
$bak->seek_first_record;
while (<$bak_fh>) {
$fh->write($_,length $_);
}
$fh->close;
$bak_fh->close;
$self->{doing_pack} = 0;
undef $self->{storage}->{deleted};
}
##########################################################
# FUNCTION CALL INTERFACE
##########################################################
sub adTie {
my($format,$file,$read_mode,$flags)=@_;
my $data;
if (ref $file eq 'ARRAY' && !$read_mode ) { $read_mode = 'u'; }
# ARRAY only {data=>[]};
if (scalar @_ == 1){
$read_mode = 'o';
tie %$data,
'AnyData::Storage::TiedHash',
adTable($format),
$read_mode;
return $data;
}
tie %$data,
'AnyData::Storage::TiedHash',
adTable($format,$file,$read_mode,undef,$flags),
$read_mode;
return $data;
}
sub adErr {
my $hash = shift;
my $t = tied(%$hash);
my $errstr = $t->{ad}->{parser}->{errstr}
|| $t->{ad}->{storage}->{errstr};
print $errstr if $errstr;
return $errstr;
}
sub adExport {
my $tiedhash = shift;
my($tformat,$tfile,$tflags)=@_;
my $ad = tied(%$tiedhash)->{ad};
my $sformat = ref $ad->{parser};
$sformat =~ s/AnyData::Format:://;
$tformat ||= $sformat;
if ($tformat eq $sformat and $tformat eq 'XML') {
return $ad->{parser}->export($ad->{storage},$tfile,$tflags);
}
return adConvert('adHash',$ad,$tformat,$tfile,undef,$tflags);
}
sub adConvert {
my( $source_format, $source_data,
$target_format,$target_file_name,
$source_flags,$target_flags )=@_;
my $target_type = 'STRING';
$target_type = 'FILE' if defined $target_file_name;
$target_type = 'ARRAY' if $target_format eq 'ARRAY';
my $data_type = 'AD-OBJECT';
$data_type = 'ARRAY' if ref $source_data eq 'ARRAY'
and ref $source_data->[0] eq 'ARRAY';
# INIT SOURCE OBJECT
my $source_ad;
if ($source_format eq 'adHash') {
$source_ad = $source_data;
undef $source_data;
}
else {
$source_format = 'CSV' if $source_format =~ /ARRAY/i;
$source_ad = adTable(
$source_format,$source_data,'r',undef,$source_flags
);
}
# GET COLUMN NAMES
my @cols;
if ( $data_type eq 'ARRAY') {
@cols = @{ shift @{ $source_data } };
}
else {
@cols = @{ $source_ad->col_names };
}
# insert storable here
if ('XML HTMLtable' =~ /$target_format/) {
$target_flags->{col_names} = join ',',@cols;
my $target_ad = adTable(
$target_format,$target_file_name,'o',undef,$target_flags
);
if ($data_type eq 'ARRAY' ) {
for my $row(@$source_data) {
my @fields=$source_ad->str2ary($row);
$target_ad->push_row( $source_ad->str2ary(\@fields) );
}
unshift @$source_data, \@cols;
return $target_ad->export($target_file_name);
}
$source_ad->seek_first_record;
while (my $row = $source_ad->get_undeleted_record) {
$target_ad->push_row( $source_ad->str2ary($row) );
}
return $target_ad->export($target_file_name);
}
my($target_ad,$fh);
### INIT TARGET OBJECT
if ($target_type eq 'FILE') {
$target_ad = adTable(
$target_format,$target_file_name,'c',undef,$target_flags
);
$fh = $target_ad->{storage}->get_file_handle;
}
elsif ($target_type eq 'STRING') {
$target_ad = AnyData->new( $target_format,$target_flags);
}
my($str,$aryref);
### GET COLUMN NAMES
if ( !$target_ad->{parser}->{no_col_print} ) {
if ($target_type eq 'ARRAY') {
push @$aryref, \@cols;
}
else {
$str = $target_ad->{parser}->write_fields(@cols);
$str =~ s/ /,/g if $target_format eq 'Fixed';
if ($target_type eq 'FILE') {
$fh->write($str,length $str);
}
if ($target_type eq 'STRING') {
$str = $target_ad->{parser}->write_fields(@cols);
}
}
}
# GET DATA
if ($data_type eq 'ARRAY') {
for my $row(@$source_data) {
my @fields = $source_ad->str2ary($row);
my $tmpstr = $target_ad->{parser}->write_fields(@fields);
# print $tmpstr if $check;
$fh->write($tmpstr,length $tmpstr) if $target_type eq 'FILE';
$str .= $tmpstr if $target_type eq 'STRING';
}
unshift @$source_data, \@cols;
return $str if $target_format ne 'ARRAY';
return $aryref;
}
$source_ad->seek_first_record; # unless $source_format eq 'XML';
while (my $row = $source_ad->get_undeleted_record) {
if ($target_format eq 'ARRAY') {
push @$aryref,$row if $target_format eq 'ARRAY';
next;
}
my @fields = $source_ad->str2ary($row);
my $tmpstr = $target_ad->{parser}->write_fields(@fields);
$str .= $target_type eq 'FILE'
? $fh->write($tmpstr,length $tmpstr)
: $tmpstr;
}
return $str if $target_format ne 'ARRAY';
return $aryref;
}
# if ('Storable' =~ /$target_format/) {
# $target_flags->{col_names} = join ',',@cols;
# $target_ad = adTable(
# $target_format,$target_file_name,'c',undef,$target_flags
# );
# if (ref $source_data && !$data) {
# for my $row(@$source_data) {
# push @$data,$row;
# }
# }
# elsif (!$data) {
# $source_ad->seek_first_record;
# while (my $row = $source_ad->fetch_row) {
# push @$data, $row;
# }
# }
# unshift @$data, \@cols;
# return $target_ad->{parser}->export($data,$target_file_name);
# }
sub str2ary {
my($ad,$row) = @_;
return @$row if ref $row eq 'ARRAY';
return $ad->{parser}->read_fields($row);
}
sub ad_string {
my($formatref,@fields) = @_;
my($format,$flags) = split_params($formatref);
# &dump($formatref); print "<$format>"; &dump($flags) if $flags;
#$formatref =~ s/(.*)/$1/;
my $ad = AnyData->new( $format, $flags );
return $ad->{parser}->write_fields(@fields);
# return $ad->write_fields(@fields);
}
sub ad_fields {
my($formatref,$str,$flags) = @_;
# my($format,$flags) = split_params($formatref);
# my $ad = AnyData::new( $format, $flags );
my $ad = AnyData->new( $formatref, $flags );
return $ad->{parser}->read_fields($str);
}
sub ad_convert_str {
my($source_formatref,$target_formatref,$str) = @_;
my($source_format,$source_flags) = split_params($source_formatref);
my($target_format,$target_flags) = split_params($target_formatref);
my $source_ad = AnyData->new( $source_format,$source_flags);
my $target_ad = AnyData->new( $target_format,$target_flags);
my @fields = $source_ad->read_fields($str);
return $target_ad->write_fields( @fields );
}
#########################################################
# UTILITY METHODS
#########################################################
#
# For all methods that have $format as a parameter,
# $format can be either a string name of a format e.g. 'CSV'
# or a hashref of the format and flags for that format e.g.
# { format => 'FixedWidth', pattern=>'A1 A3 A2' }
#
# given this parameter, this method returns $format and $flags
# setting $flags to {} if none are given
#
sub split_params {
my $source_formatref = shift;
my $source_flags = {};
my $source_format = $source_formatref;
if (ref $source_formatref eq 'HASH') {
while (my($k,$v)=each %$source_formatref) {
($source_format,$source_flags) = ($k,$v);
}
}
#use Data::Dumper;
return( $source_format, $source_flags);
}
sub dump {
my $var = shift;
my $name = ref($var);
#use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Useqq = 0;
print Data::Dumper->new([$var],[$name])->Dump();
}
###########################################################################
# START OF DOCUMENTATION
###########################################################################
=pod
=head1 NAME
AnyData -- easy access to data in many formats
=head1 SYNOPSIS
$table = adTie( 'CSV','my_db.csv','o', # create a table
{col_names=>'name,country,sex'}
);
$table->{Sue} = {country=>'de',sex=>'f'}; # insert a row
delete $table->{Tom}; # delete a single row
$str = $table->{Sue}->{country}; # select a single value
while ( my $row = each %$table ) { # loop through table
print $row->{name} if $row->{sex} eq 'f';
}
$rows = $table->{{age=>'> 25'}} # select multiple rows
delete $table->{{country=>qr/us|mx|ca/}}; # delete multiple rows
$table->{{country=>'Nz'}}={country=>'nz'}; # update multiple rows
my $num = adRows( $table, age=>'< 25' ); # count matching rows
my @names = adNames( $table ); # get column names
my @cars = adColumn( $table, 'cars' ); # group a column
my @formats = adFormats(); # list available parsers
adExport( $table, $format, $file, $flags ); # save in specified format
print adExport( $table, $format, $flags ); # print to screen in format
print adDump($table); # dump table to screen
undef $table; # close the table
adConvert( $format1, $file1, $format2, $file2 ); # convert btwn formats
print adConvert( $format1, $file1, $format2 ); # convert to screen
=head1 DESCRIPTION
The rather wacky idea behind this module and its sister module
DBD::AnyData is that any data, regardless of source or format should
be accessable and modifiable with the same simple set of methods.
This module provides a multi-dimensional tied hash interface to data
in a dozen different formats. The DBD::AnyData module adds a DBI/SQL
interface for those same formats.
Both modules provide built-in protections including appropriate
flocking() for all I/O and (in most cases) record-at-a-time access to
files rather than slurping of entire files.
Currently supported formats include general format flatfiles (CSV,
Fixed Length, etc.), specific formats (passwd files, httpd logs,
etc.), and a variety of other kinds of formats (XML, Mp3, HTML
tables). The number of supported formats will continue to grow
rapidly since there is an open API making it easy for any author to
create additional format parsers which can be plugged in to AnyData
itself and thereby be accessible by either the tiedhash or DBI/SQL
interface.
=head1 PREREQUISITES
The AnyData.pm module itself is pure Perl and does not depend on
anything other than modules that come standard with Perl. Some
formats and some advanced features require additional modules: to use
the remote ftp/http features, you must have the LWP bundle installed;
to use the XML format, you must have XML::Parser and XML::Twig installed;
to use the HTMLtable format for reading, you must have HTML::Parser and
HTML::TableExtract installed but you can use the HTMLtable for writing
with just the standard CGI module. To use DBI/SQL commands, you must have
DBI, DBD::AnyData, SQL::Statement and DBD::File installed.
=head1 USAGE
The AnyData module imports eight methods (functions):
adTie() -- create a new table or open an existing table
adExport() -- save an existing table in a specified format
adConvert() -- convert data in one format into another format
adFormats() -- list available formats
adNames() -- get the column names of a table
adRows() -- get the number of rows in a table or query
adDump() -- display the data formatted as an array of rows
adColumn() -- group values in a single column
The adTie() command returns a special tied hash. The tied hash can
then be used to access and/or modify data. See below for details
With the exception of the XML, HTMLtable, and ARRAY formats, the
adTie() command saves all modifications of the data directly to file
as they are made. With XML and HTMLtable, you must make your
modifications in memory and then explicitly save them to file with
adExport().
=head2 adTie()
my $table = adTie( $format, $data, $open_mode, $flags );
The adTie() command creates a reference to a multi-dimensional tied hash. In its simplest form, it simply reads a file in a specified format into the tied hash:
my $table = adTie( $format, $file );
$format is the name of any supported format 'CSV','Fixed','Passwd', etc.
$file is the name of a relative or absolute path to a local file
e.g. my $table = adTie( 'CSV', '/usr/me/myfile.csv' );
this creates a tied hash called $table by reading data in the
CSV (comma separated values) format from the file 'myfile.csv'.
The hash reference resulting from adTie() can be accessed and modified as follows:
use AnyData;
my $table = adTie( $format, $file );
$table->{$key}->{$column} # select a value
$table->{$key} = {$col1=>$val1,$col2=>$val2...} # update a row
delete $table->{$key} # delete a row
while(my $row = each %$table) { # loop through rows
print $row->{$col1} if $row->{$col2} ne 'baz';
}
The thing returned by adTie ($table in the example) is not an object,
it is a reference to a tied hash. This means that hash operations
such as exists, values, keys, may be used, keeping in mind that this
is a *reference* to a tied hash so the syntax would be
for( keys %$table ) {...}
for( values %$table ) {...}
Also keep in mind that if the table is really large, you probably do
not want to use keys and values because they create arrays in memory
containng data from every row in the table. Instead use 'each' as
shown above since that cycles through the file one record at a time
and never puts the entire table into memory.
It is also possible to use more advanced searching on the hash, see "Multiple Row Operations" below.
In addition to the simple adTie($format,$file), there are other ways to specify additional information in the adTie() command. The full syntax is:
my $table = adTie( $format, $data, $open_mode, $flags );
The $data parameter allows you to read data from remote files accessible by
http or ftp, see "Using Remote Files" below. It also allows you to treat
strings and arrays as data sources without needing a file at all, see
"Working with Strings and Arrays" below.
The optional $mode parameter defaults to 'r' if none is supplied or must be
one of
'r' read # read only access
'u' update # read/write access
'c' create # create a new file unless it already exists
'o' overwrite # create a new file, overwriting any that already exist
The $flags parameter allows you to specify additional information such as column names. See the sections in "Further Details" below.
With the exception of the XML, HTMLtable, and ARRAY formats, the
adTie() command saves all modifications of the data directly to file
as they are made. With XML and HTMLtable, you must make your
modifications in memory and then explicitly save them to file with
adExport().
=head2 adConvert()
adConvert( $format1, $data1, $format2, $file2, $flags1, $flags2 );
or
print adConvert( $format1, $data1, $format2, undef, $flags1, $flags2 );
or
my $aryref = adConvert( $format1, $data1, 'ARRAY', undef, $flags1 );
This method converts data in any supported format into any other supported
format. The resulting data may either be saved to a file (if $file2 is
supplied as a parameter) or sent back as a string to e.g. print the data
to the screen in the new format (if no $file2 is supplied), or sent back
as an array reference if $format2 is 'ARRAY'.
Some examples:
# convert a CSV file into an XML file
#
adConvert('CSV','foo.csv','XML','foo.xml');
# convert a CSV file into an HTML table and print it to the screen
#
print adConvert('CSV','foo.csv','HTMLtable');
# convert an XML string into a CSV file
#
adConvert('XML', ["