#!/usr/bin/perl # # dblib.pl # Copyright (C) 1991-2002 by John Heidemann # $Id: dblib.pl,v 1.41 2003/05/23 04:15:47 johnh Exp $ # # This program is distributed under terms of the GNU general # public license, version 2. See the file COPYING # in $dblibdir for details. # $col_headertag = "#h"; $list_headertag = "#L"; $headertag_regexp = "#[hL]"; $fs_code = 'D'; $header_fsre = "[ \t\n]+"; $fsre = "[ \t\n]+"; $outfs = "\t"; $header_outfs = " "; $codify_code = ""; $default_format = "%.5g"; sub col_mapping { local ($key, $n) = @_; die("dblib col_mapping: column name ``$key'' cannot begin with underscore.\n") if ($key =~ /^\_/); die("dblib col_mapping: duplicate column name ``$key''\n") if (defined($colnametonum{$key})); die ("dblib col_mapping: bad n.\n") if (!defined($n)); $colnames[$n] = $key; $colnametonum{$key} = $n; $colnametonum{"_$key"} = $n; $colnametonum{"$n"} = $n; # numeric synonyms } sub col_unmapping { local ($key) = @_; local ($n); $n = $colnametonum{$key}; $colnames[$n] = undef if (defined($n)); delete $colnametonum{$key}; delete $colnametonum{"_$key"}; } # Create a new column. # Insert it before column $desired_n. sub col_create { local ($key, $desired_n) = @_; local ($n, $i); die ("dblib col_create: called with duplicate column name ``$key''.\n") if (defined($colnametonum{$key})); if (defined($desired_n)) { # Shift columns over as necessary. $n = $colnametonum{$desired_n}; for ($i = $#colnames; $i >= $n; $i--) { $tmp_key = $colnames[$i]; &col_unmapping($tmp_key); &col_mapping($tmp_key, $i+1); }; } else { $n = $#colnames+1; }; $colnames[$n] = $key; &col_mapping ($colnames[$n], $n); return $n; } sub fs_code_to_fsre_outfs { my($value) = @_; my($fsre, $outfs); if (!defined($value) || $value eq 'D') { # default $fsre = "[ \t\n]+"; $outfs = "\t"; } elsif ($value eq 'S') { # double space $fsre = '\s\s+'; $outfs = " "; } elsif ($value eq 't') { # single tab $fsre = "\t"; $outfs = "\t"; } else { # anything else $value = eval "qq{$value}"; # handle backslash expansion $fsre = "[$value]+"; $outfs = $value; } return ($fsre, $outfs); } sub process_header { my($line, $headertag) = @_; $regexp = (defined($headertag) ? $headertag : $headertag_regexp); die ("dblib process_header: undefined header.\n\t$line\n") if (!defined($line)); die ("dblib process_header: invalid header format: ``$line''.\n") if ($line !~ /^$regexp/); # perl-5.8.0 on a FRESH redhat 8.0 system give the error: # "Split loop, line 1." # on the next line (with the split) # if we directly assign to @colnames. # @colnames = split(/$header_fsre/, $line); # The problem does NOT happen if we instead # go through a temp variable, so we do that. # (It also does not happen if we use an upgraded RH8 system!) # Strange. my @c = split(/$header_fsre/, $line); @colnames = @c; shift @colnames; # toss headertag @coloptions = (); # # handle options # while ($#colnames >= 0 && $colnames[0] =~ /^-(.)(.*)/) { push(@coloptions, shift @colnames); my($key, $value) = ($1, $2); if ($key eq 'F') { ($fsre, $outfs) = fs_code_to_fsre_outfs($value); $fs_code = $value; }; }; %colnametonum = (); foreach $i (0..$#colnames) { &col_mapping ($colnames[$i], $i); }; } sub readprocess_header { my($headertag) = @_; my($line); $line = ; &process_header($line, $headertag); } sub write_header_fh_tag { my($FH) = shift; my($headertag) = shift; my(@cols) = @_; @cols = @colnames if ($#cols == -1); print $FH "$headertag$header_outfs" . ($#coloptions != -1 ? join($header_outfs, @coloptions, '') : "") . join($header_outfs, @cols) . "\n"; } sub write_header { write_header_fh_tag('STDOUT', $col_headertag, @_); } # listized sub write_list_header { write_header_fh_tag('STDOUT', $list_headertag, @_); } sub escape_blanks { my($line) = @_; $line =~ s/[ \t]/_/g; return $line; } sub unescape_blanks { my($line) = @_; $line =~ s/_/ /g; return $line; } # # codify: convert db-code into perl code # # The conversion is a rename of all _foo's into # database fields. # For more perverse needs, _foo(N) means the Nth field after _foo. # Also, as of 29-Jan-00, _last_foo gives the last row's value # (_last_foo(N) is not supported). # To convert we eval $codify_code. # # NEEDSWORK: Should make some attempt to catch misspellings of column # names. # sub codify { if ($codify_code eq "") { foreach (@colnames) { $codify_code .= '$f =~ s/\b\_' . quotemeta($_) . '(\(.*\))/\$f\[' . $colnametonum{$_} . '+$1\]/g;' . "\n"; $codify_code .= '$f =~ s/\b\_' . quotemeta($_) . '\b/\$f\[' . $colnametonum{$_} . '\]/g;' . "\n"; $codify_code .= '$f =~ s/\b\_last\_' . quotemeta($_) . '\b/\$lf\[' . $colnametonum{$_} . '\]/g;' . "\n"; }; }; my($f) = join(";", @_); eval $codify_code; return $f; } # # code_prettify: Convert db-code into "pretty code". # sub code_prettify { local($prettycode) = join(";", @_); $prettycode =~ s/\n/ /g; # newlines will break commenting return $prettycode; } sub is_comment { return ($_ =~ /^\#/) || ($_ =~ /^\s*$/); } sub pass_comments { if (&is_comment) { print $_; return 1; }; return 0; } sub delayed_pass_comments { if (&is_comment) { $delayed_comments = '' if (!defined($delayed_comments)); $delayed_comments .= $_; return 1; }; return 0; } sub delayed_write_comments { my($FH) = @_; $FH = 'STDOUT' if (!defined($FH)); print $FH $delayed_comments if (defined($delayed_comments)); } sub delayed_flush_comments { delayed_write_comments(@_); $delayed_comments = undef; } sub split_cols { chomp $_; @lf = @f; # save the last columns @f = split(/$fsre/, $_); } sub write_cols { &write_these_cols_fh('STDOUT', @f); }; sub write_these_cols { &write_these_cols_fh('STDOUT', @_); }; sub write_these_cols_fh { my($FH) = shift; print $FH join($outfs, @_), "\n"; }; # # output compare/entry code based on ARGV # first entry is a sub: # sub row_col_fn { # my($row, $colname, $n) = @_; # # row is either a or b which we're comparing, or i for entries # # colname is the user-given column name # # n is 0..N of the cols to be sorted # } # See the code in dbjoin and dbsort for implementations. # sub generate_compare_code { my($compare_function_name) = shift @_; my($row_col_fn) = shift @_; my(@args) = @_; my ($compare_code, $enter_code, $reverse, $numeric, $insensitive, $i); $compare_code = "sub $compare_function_name {\n"; $enter_code = ""; $reverse = 0; $numeric = 0; $insensitive = 0; $i = 0; foreach (@args) { if (/^-/) { s/^-//; my($options) = $_; while ($options ne '') { $options =~ s/(.)//; ($ch) = $1; if ($ch eq 'r') { $reverse = 1; } elsif ($ch eq 'R') { $reverse = 0; } elsif ($ch eq 'n') { $numeric = 1; } elsif ($ch eq 'N') { $numeric = 0; } elsif ($ch eq 'i') { $insensitive = 1; die "$0: -i not yet supported\n"; } elsif ($ch eq 'I') { $insensitive = 0; die "$0: -I not yet supported\n"; } else { die "dblib generate_compare_code: unknown option $ch.\n"; }; }; next; }; die ("dblib generate_compare_code: unknown column $_.\n") if (!defined($colnametonum{$_})); if ($reverse) { $first = 'b'; $second = 'a'; } else { $first = 'a'; $second = 'b'; }; $compare_code .= '$r = (' . &$row_col_fn($first, $_, $i) . ' ' . ($numeric ? "<=>" : "cmp") . ' ' . &$row_col_fn($second, $_, $i) . '); ' . 'return $r if ($r);' . " # $_" . ($reverse && $numeric ? " (reversed, numeric)" : $reverse ? " (reversed)" : $numeric ? " (numeric)" : "") . "\n"; $enter_code .= &$row_col_fn('i', $_, $i) . ' = $f[' . $colnametonum{$_} . '];' . "\n"; $i++; } $compare_code .= "return 0;\n}"; # Create the comparison function. eval $compare_code; $@ && die("dblib generate_compare_code: error ``$@ in'' eval of compare_code.\n$compare_code"); return ($compare_code, $enter_code, $i-1); } sub abs { return $_[0] > 0 ? $_[0] : -$_[0]; } sub progname { my($prog) = ($0); $prog =~ s@^.*/@@g; return $prog; } sub force_numeric { my($value, $ignore_non_numeric) = @_; if ($value =~ /^\s*[-+]?[0-9]+(.[0-9]+)?(e[-+0-9]+)?\s*$/) { return $value + 0.0; # force numeric } else { if ($ignore_non_numeric) { return undef; next; } else { return 0.0; }; }; } my($tmpfile_counter) = 0; my(@tmpfiles) = (); # call as tmpfile(FH) sub db_tmpfile { my($fh) = @_; my($i) = $tmpfile_counter++; if ($i == 0) { # install signals on first time foreach (qw(HUP INT TERM)) { $SIG{$_} = \&db_tmpfile_cleanup_signal; }; }; my($fn) = &db_tmpdir . "/jdb.$$.$i"; push(@tmpfiles, $fn); open($fh, "+>$fn") || die "$0: tmpfile open failed.\n"; chmod 0600, $fn || die "$0: tmpfile chmod failed.\n"; return $fn; } sub db_tmpfile_cleanup { my($fn) = @_; my(@new_tmpfiles); foreach (@tmpfiles) { push(@new_tmpfiles, $_) if ($_ ne $fn); }; unlink($fn) == 1 or die "$0: cannot unlink tmpfiles.\n"; @tmpfiles = @new_tmpfiles; } sub db_tmpfile_cleanup_all { foreach (@tmpfiles) { db_tmpfile_cleanup($_) if (-f $_); }; } sub db_tmpfile_cleanup_signal { my($sig) = @_; # print "terminated with signal $sig, cleaning up ". join(',', @tmpfiles) . "\n"; exit(1); } sub db_tmpdir { $ENV{'TMPDIR'} = '/tmp' if (!defined($ENV{'TMPDIR'})); return $ENV{'TMPDIR'}; } my($dblib_date_inited) = undef; sub dblib_date_init { eval "use HTTP::Date; use POSIX"; } sub date_to_epoch { my($date) = @_; &dblib_date_init if (!$dblib_date_inited); return str2time($date); } sub epoch_to_date { my($epoch) = @_; &dblib_date_init if (!$dblib_date_inited); my($d) = strftime("%d-%b-%y", gmtime($epoch)); $d =~ s/^0//; return $d; } sub epoch_to_fractional_year { my($epoch) = @_; &dblib_date_init if (!$dblib_date_inited); my($year) = strftime("%Y", gmtime($epoch)); my($year_beg_epoch) = date_to_epoch("${year}0101"); my($year_end_epoch) = date_to_epoch(($year+1) . "0101"); my($year_secs) = $year_end_epoch - $year_beg_epoch; my($fraction) = ($epoch - $year_beg_epoch) / (1.0 * $year_secs); $fraction =~ s/^0//; return "$year$fraction"; } sub END { &db_tmpfile_cleanup_all(); } my(%iso_8859_1_to_html); my($iso_8859_1_to_html_re_lhs); sub init_dblib_text2html { $iso_8859_1_to_html_re_lhs = ('(['); foreach (qw(38:amp 60:lt 62:gt 160:nbsp 161:iexcl 162:cent 163:pound 164:curren 165:yen 166:brvbar 167:sect 168:uml 169:copy 170:ordf 171:laquo 172:not 173:shy 174:reg 175:macr 176:deg 177:plusmn 178:sup2 179:sup3 180:acute 181:micro 182:para 183:middot 184:cedil 185:sup1 186:ordm 187:raquo 188:frac14 189:frac12 190:frac34 191:iquest 192:Agrave 193:Aacute 194:Acirc 195:Atilde 196:Auml 197:Aring 198:AElig 199:Ccedil 200:Egrave 201:Eacute 202:Ecirc 203:Euml 204:Igrave 205:Iacute 206:Icirc 207:Iuml 208:ETH 209:Ntilde 210:Ograve 211:Oacute 212:Ocirc 213:Otilde 214:Ouml 215:times 216:Oslash 217:Ugrave 218:Uacute 219:Ucirc 220:Uuml 221:Yacute 222:THORN 223:szlig 224:agrave 225:aacute 226:acirc 227:atilde 228:auml 229:aring 230:aelig 231:ccedil 232:egrave 233:eacute 234:ecirc 235:euml 236:igrave 237:iacute 238:icirc 239:iuml 240:eth 241:ntilde 242:ograve 243:oacute 244:ocirc 245:otilde 246:ouml 247:divide 248:oslash 249:ugrave 250:uacute 251:ucirc 252:uuml 253:yacute 254:thorn 255:yuml)) { my($num, $code) = split(/:/); $iso_8859_1_to_html{chr($num)} = "&$code;"; if ($num > 128) { my($oct) = sprintf("%o", $num); $iso_8859_1_to_html{"\\$oct"} = "&$code;"; }; $iso_8859_1_to_html_re_lhs .= chr($num); } $iso_8859_1_to_html_re_lhs .= ']|\\\\[23][0-7][0-7])'; } sub dblib_text2html { init_dblib_text2html if (!defined($iso_8859_1_to_html{'&'})); my($s) = @_; $s =~ s/$iso_8859_1_to_html_re_lhs/$iso_8859_1_to_html{$1}/ge; return $s; } 1;