# Parse::PerlConfig - parse a configuration file written in Perl # Copyright (C) 1999 Michael Fowler, all rights reserved # This program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. package Parse::PerlConfig; require Exporter; use Fcntl qw(O_RDONLY); use strict; use vars ( qw($VERSION @ISA @EXPORT_OK), qw($saved_dollar_slash), # used in _do_file() ); @ISA = qw(Exporter); @EXPORT_OK = qw(parse); $VERSION = '0.05'; my %thing_str2key = ( '$' => 'SCALAR', '@' => 'ARRAY', '%' => 'HASH', '&' => 'CODE', '*' => 'GLOB', 'i' => 'IO', ); my %thing_key2str; @thing_key2str{values %thing_str2key} = keys(%thing_str2key); sub parse { local *FILE; my $subname = (caller(0))[3]; my %args = ( Namespace_Base => __PACKAGE__ . '::ConfigFile', Thing_Order => '$@%&i*', Taint_Clean => 0, Warn_default => 'noop', Warn_preparse => 'default', Warn_eval => 'default', Error_default => 'warn', Error_argument => 'default', Error_file_is_dir => 'default', Error_failed_open => 'default', Error_eval => 'default', Error_unknown_thing => 'default', Error_unknown_handler => 'default', Error_invalid_lexical => 'default', Error_invalid_namespace => 'default', ); if (ref($_[0]) eq 'HASH') { # first argument is a hash.. %args = (%args, %{+ shift }, @_); # ..dereference it } elsif (ref($_[0]) eq 'ARRAY') { # first argument is an array.. %args = (%args, @{+ shift }, @_); # ..dereference it } elsif (@_) { %args = (%args, @_); } my $def_errsub = _errsub($args{'Error_default'}); my $def_warnsub = _errsub($args{ 'Warn_default'}); my(%errsubs, %warnsubs); foreach my $handler (qw( argument file_is_dir failed_open eval unknown_thing unknown_handler invalid_namespace )) { $errsubs{$handler} = _errsub($args{"Error_$handler"}, $def_errsub); } foreach my $handler (qw(preparse eval)) { $warnsubs{$handler} = _errsub($args{"Warn_$handler"}, $def_warnsub); } # This allows us to pass around %args, rather than each hash necessary. $args{'_errsubs'} = \%errsubs; $args{'_warnsubs'} = \%warnsubs; my @files; push(@files, $args{File}) if defined($args{File}); if (ref($args{Files}) eq 'ARRAY') { push(@files, @{ $args{Files} }); } elsif (defined $args{Files}) { push(@files, $args{Files}); } unless (@files) { $errsubs{'argument'}->( "Files or File argument required in call to $subname." ); return; } my @handlers; push(@handlers, $args{Handler}) if defined($args{Handler}); if (ref($args{Handlers}) eq 'ARRAY') { push(@handlers, @{ $args{Handlers} }); } elsif (defined $args{Handlers}) { push(@handlers, $args{Handlers}); } my %lexicals; if (ref $args{Lexicals} eq 'HASH') { %lexicals = %{ $args{Lexicals} }; } elsif (defined $args{Lexicals}) { $errsubs{'argument'}->( "Lexicals argument must be a hashref in call to $subname." ); } my @def_thing_order = _thingstr_to_array(\%args, $args{'Thing_Order'}); my %custom_symbols; if (ref($args{Symbols}) eq 'HASH') { while (my($sym, $order) = each(%{$args{Symbols}})) { $custom_symbols{$sym} = [ _thingstr_to_array(\%args, $order) ]; } } my $lexicals_string = _construct_lexicals_string(\%args, \%lexicals); # Having checked all of our arguments, we run through our files. my %parsed_symbols; FILE: foreach my $file (@files) { $errsubs{'file_is_dir'}->("Config file \"$file\" is a directory.") if -d $file; $warnsubs{'preparse'}->("Preparing to parse config file \"$file\"."); unless (sysopen FILE, $file, O_RDONLY) { $errsubs{'failed_open'}->( "Unable to open config file \"$file\": \l$!." ); return; } if ($args{'Taint_Clean'}) { require IO::Handle; FILE->untaint; } my $namespace = _construct_namespace(\%args, $file); unless (_valid_namespace($namespace)) { $errsubs{'invalid_namespace'}->( "Namespace \"$namespace\" is invalid." ); return; } { my %parse_perl_config = ( Parse_Args => \%args, Filename => $file, Namespace => $namespace, Error => undef, ); my $eval_warn = $warnsubs{'eval'}; local $SIG{__WARN__} = sub { $eval_warn->(join "", @_) }; _do_file(\*FILE, $namespace, \%parse_perl_config, $lexicals_string); my $error; if (defined($error = $parse_perl_config{Error})) { $errsubs{'eval'}->( "Configuration file raised an error: $error." ); next FILE; } elsif ($@) { $error = $@; 1 while chomp($error); $errsubs{'eval'}->("Error in configuration eval: $error."); next FILE; } } _parse_symbols( Namespace => $namespace, Thing_Order => \@def_thing_order, Symbols => \%custom_symbols, Hash => \%parsed_symbols, ); } _dispatch_handlers(\%args, \@handlers, \%parsed_symbols); return \%parsed_symbols; } # _parse_symbols # Namespace - namespace to parse symbols from # Symbols - hashref of symbols with specific thing ordering # Thing_Order - default thing order, arrayref # Hash - a hashref into which parsed symbols are placed # # This an internal function used by parse() to do the actual parsing of # symbols from a namespace. This function has the potential to be a public # one, if sanity checking on arguments is added. sub _parse_symbols { my %args = @_; my $namespace = $args{'Namespace'}; my %custom_symbols = %{ $args{'Symbols'} }; my @def_thing_order = @{ $args{'Thing_Order'} }; my $parsed_symbols = $args{'Hash'}; no strict 'refs'; while (my($symbol, $glob) = each(%{"$namespace\::"})) { my @thing_order; if (exists $custom_symbols{$symbol}) { @thing_order = @{ $custom_symbols{$symbol} }; } else { @thing_order = @def_thing_order; } my $value; foreach my $thing (@thing_order) { if ($thing eq 'SCALAR') { # Special case for scalars; we always get a scalar # reference, even if the underlying scalar is undefined. if (defined ${ *$glob{SCALAR} }) { $value = ${ *$glob{SCALAR} }; last; } } elsif (defined *$glob{$thing}) { $value = *$glob{$thing}; last; } } $$parsed_symbols{$symbol} = $value if defined($value); # In order to prevent various warnings, and the symbols from still # being there (even though the symbol table isn't), we undef each # glob as we go. undef(*{"$namespace\::$symbol"}); } return; } sub _dispatch_handlers { my($args, $handlers, $parsed_symbols) = (shift, shift, shift); foreach my $handler (@$handlers) { if (ref $handler eq 'CODE') { $handler->($parsed_symbols); } elsif (ref $handler eq 'HASH') { @$handler{keys %$parsed_symbols} = values %$parsed_symbols } else { $$args{'_errsubs'}{'unknown_handler'}->( 'Unknown handler type "' . ref($handler) . '"' ); } } } # _do_file # # # Reads the given filename using sysopen and eval, in the specified # namespace. The hash %parse_perl_config is set with the specified # hashref. # # The reason this subroutine exists is to keep the lexical space as clean # as possible, while still allowing some lexicals through. Were this # functionality inlined with the rest of parse(), a configuration file # would have access to parse()'s lexicals. To keep things even cleaner # local() is used rather than my(). Obviously the latter is preferable, # but in this case, would cause problems sub _do_file { # Arguments are accessed through @_ indexing, rather than shifting, to # keep the lexical space as clean as possible. local *FILE = $_[0] ; my %parse_perl_config = %{+ $_[2] }; # We go to some lengths to be able to slurp the file, while # still keeping $/ intact. local $saved_dollar_slash = $/; local $/; no strict; eval '$/ = $saved_dollar_slash;' . "package $_[1];" . $_[3] . # lexical definitions ; } sub _construct_lexicals_string { my($args, $lexicals) = (shift, shift); return '' unless %$lexicals; require Data::Dumper; my $inv_lex_errsub = $$args{'_errsubs'}{'invalid_lexical'}; my $lexicals_string = ''; LEXICAL: while (my($key, $value) = each(%$lexicals)) { if ($key !~ /^([^_\W][\w\d]*|\w[\w\d]+)$/) { $inv_lex_errsub->( "Lexical name \"$key\" is invalid, must be a valid " . "identifier." ); next LEXICAL; } elsif (ref($value) eq 'CODE') { $inv_lex_errsub->( "Lexical \"$key\" value is invalid, code references " . "are not allowed." ); next LEXICAL; } elsif ($key eq 'parse_perl_config' && ref($value) eq 'HASH') { $inv_lex_errsub->( "Cannot have a hash lexical named \"parse_perl_config\"." ); next LEXICAL; } $lexicals_string .= 'my ' . Data::Dumper->Dump([$value], ["*$key"]); } $lexicals_string; } sub _construct_namespace { my($args, $file) = (shift, shift); my $namespace; if (defined $$args{'Namespace'}) { $namespace = $$args{'Namespace'}; } else { $namespace = "$$args{'Namespace_Base'}::" . _encode_namespace($file); } if ($$args{'Taint_Clean'}) { # We've already filtered the namespace, but perl doesn't know # that; fake it. ($namespace) = ($namespace =~ /(.*)/); } return $namespace; } sub _valid_namespace { my $namespace = shift; foreach my $ns_ele (split /::/, $namespace) { return 0 unless $ns_ele =~ /^[_A-Za-z][_A-Za-z0-9]*/; } return 1; } sub _encode_namespace { my $namespace = shift; my @namespace; foreach my $ns_ele (split /::/, $namespace) { # ^A-Za-z0-9 (as opposed to [\W\D]) is spelled out explicitly # because package names are not (yet?) locale-friendly. $ns_ele =~ s{ ( (?:^[^A-Za-z]) # first character must not be a number | [^A-Za-z0-9] # any further characters can be ) }{ sprintf("_%2x", ord $1) }egx ; push(@namespace, $ns_ele); } return join("::", @namespace); } # _thingstr_to_array # # Translates a thing string ($%@*i&) into the associated glob keys: # $ SCALAR # @ ARRAY # % HASH # * GLOB # i IO # & CODE # # # Returns the keys as a list, in the same order. The specified coderef is # an error function used to report when an unknown character is # encountered. # # If the string is actually an array reference, the array is dereferenced, # checked for invalid keys, and returned. sub _thingstr_to_array { my($args, $string) = (shift, shift); my $errsub = $$args{'_errsubs'}{'unknown_thing'}; if (ref($string) eq 'ARRAY') { my @filtered; foreach my $thing (@$string) { $thing = uc($thing); if (!exists $thing_key2str{$thing}) { $errsub->("Unknown thing key \"$thing\"."); next; } push(@filtered, $thing); } return @filtered; } my @keys; foreach my $c (split //, $string) { unless (defined $thing_str2key{$c}) { $errsub->("Undefined thing string \"$c\"."); next; } push(@keys, $thing_str2key{$c}); } return @keys; } sub _fwarn { CORE::warn(shift() . "\n") } sub _warn { CORE::warn(shift() . "\n") if $^W } sub _die { CORE::die (shift() . "\n") } sub _noop { } # _errsub [] # Responsible for parsing the "default", "noop", "warn", "fwarn", and # "die" strings, and returning an appropriate code reference. sub _errsub { my($spec, $default) = (shift, shift); $spec = lc($spec) unless ref($spec); (ref $spec eq 'CODE' ) && return $spec; ( $spec eq 'warn' ) && return \&_warn; ( $spec eq 'fwarn') && return \&_fwarn; ( $spec eq 'die' ) && return \&_die; ( $spec eq 'noop' ) && return \&_noop; # catch anything that falls through return (ref $default eq 'CODE') ? $default : \&_warn; } 1; __END__ =head1 NAME Parse::PerlConfig - parse a configuration file written in Perl =head1 SYNOPSIS use Parse::PerlConfig; my $parsed = Parse::PerlConfig::parse( File => "/etc/perlapp/conf", Handlers => [\%config, \&config], ); =head1 DESCRIPTION This module is useful for parsing a configuration file written in Perl and obtaining the values defined therein. This is achieved through the parse() function, which creates a namespace, reads in Perl code, evals it, and then examines the namespace's symbol table. Symbols are then processed into a hash and returned. =head2 Export The parse() function is exportable upon request. =head2 Parsing Parsing is not a simple do("filename"). Instead the filenames specified are opened, read, eval'd, and closed. The justification for this being twofold: =over 4 =item no @INC search I did not want surprises in what file was found; do("file") searches @INC. =item lexicals I wanted to be able to insert lexicals for the code in the file to see. Being able to define variables without having them parsed back out (remember, the namespace is searched) is a nice feature. =back Parsing (in this manner) requires a namespace. By default, the namespace is constructed by appending Namespace_Base to a unique identifier (currently, an encoded version of the filename, but don't rely on this). You can override this behaviour by specifying an explicit Namespace argument. Prior to eval'ing the contents of a configuration file the lexical hash %parse_perl_config is initialized with several keys (documented below); if a Lexicals argument was given each of the lexicals specified are initialized. There are a few caveats; lexicals specified in the Lexicals argument cannot override %parse_perl_config; keys specified in Lexicals cannot be code references, because code references cannot currently be reliably reconstructed; modifications to %parse_perl_config keys (other than Error, documented below) are discouraged, as the results are not defined. The %parse_perl_config hash contains the following keys: =over 4 =item Error Making this key a true value will cause the error handler Error_eval to be called with the value. =item Namespace The namespace the file is being evaluated in. =item Filename The name of the file being parsed. =item Parse_Args A hash of the arguments passed to parse(). =back Once the namespace has been setup, and the code eval'd, it is then parse()'s job to go through the namespace's symbol table and look for "things". What it looks for depends on the Thing_Order and Symbols arguments. After that, handlers are updated, and a hash reference of what was parsed out is returned. =head1 The parse subroutine =head2 Arguments parse() takes a list of key-value pairs as its arguments and adds them to an argument hash. If the first argument to parse() is a hash or array reference, it is dereferenced and used as if it were specified as a list. All elements following this argument are added to the arguments hash, and they override any settings specified by the reference. This means the call: parse( { Files => "/home/me/config.conf", Error_default => 'fwarn' }, Files => "/home/you/config.conf" ); causes parse()'s argument hash to consist of the following (ignoring default settings): Files => "/home/you/config.conf", Error_default => "fwarn", Simply replace the braces, {}, with brackets, [], and you get the same result. This makes it convenient to store commonly-used arguments to parse() in a hash or array, and efficiently pass these arguments to parse(), while still allowing a seperate Files argument for each call. The below itemization of parse()'s arguments describes key-value pairs. Each item consists of a key name and a description of the expected value for that key. The value description requires some explanation. A single pipe, "|", indicates alternative values; only one of the values must be specified. Values bracketed with "<" and ">" indicate that value is not literal, but figurative. So, in the case of , you must specify a code reference (a closure or reference to a named subroutine), not the literal string "". Values without such bracketing are literal. Braces, {}, indicate a hash reference is required; brackets, [], indicate an array reference is required. Below each key-value description is a description of the default setting, followed by a description of what the argument means. =over 4 =item Files | [ ...] default: none, this argument is required This is the file or files you wish to parse. If a file cannot be parsed for any reason the entire parse is not abandoned, the file is simply skipped (after calling an appropriate error handling function). =item File Equivalent to Files . =item Handlers | | [| ...] default: none By default, parse() simply returns a hash reference of symbol names and their values. Given a Handlers argument, parse() will add key-value pairs to each hash reference specified, and call each code reference specified with a single argument, the hash reference it returns. =item Handler | Equivalent to Handlers |. =item Lexicals default: none The key-value pairs in the specified hashref are made into lexical variables in the configuration eval. See the section on Parsing for further information. =item Thing_Order | default: '$@%&i*' Specifies the default thing order for symbols parsed from each configuration file. See the section Things for further information. =item Taint_Clean default: false If set to any true value the filehandle opened on the configuration file is untainted before evaling the code contained therein. Because this involves loading IO::Handle, which involves quite a bit of code, the option is turned off by default. You I get taint exceptions if you don't specify this option while running in C<-T> mode. Also, as the namespace is currently constructed, having a tainted filename will cause the namespace name to be tainted, so it is also untainted. In the case of an explicitly specified Namespace value, it will also be untainted. No other values are untainted. This includes any key-value pairs specified by the Lexicals argument; you must untaint those yourself, since there is no reasonable way for parse() to determine how best to untaint them. =item Symbols default: empty hashref This is an override for the Thing_Order argument above. The keys in the specified hashref are symbols you want parsed specially, the values the thing order (either a string or array reference). See the section Things for further information regarding thing order. =item Namespace default: generated from Namespace_Base and a unique identifier This option explicitly specifies the namespace the files are parsed in. See the section Parsing for further information. =item Namespace_Base default: Parse::PerlConfig::ConfigFile Unless the Namespace argument is specified, the namespace a file is parsed in is generated by appending a cleaned up version of the filename to this setting. See the section Parsing for further information. =item Error_* See the section Error Handling for further information. =back =head2 Things "Things" (as taken from the Perl documentation, regarding the *foo{THING} syntax) are the Perl datatypes. These include scalars, arrays, hashes, subroutines, IO handles, and globs. Anywhere a "things" argument is required you can specify one of two things; a string containing the special "thing" characters, or an array reference of each thing's actual name. The thing characters are as follows: C<$> for scalar, C<%> for a hash, C<@> for an array, C<&> for a subroutine, C for an IO handle, and C<*> for a glob. The full name for each coincides with the full name for each datatype in their respective glob slots: SCALAR for a scalar, HASH for a hash, ARRAY for an array, CODE for a subroutine, IO for an IO handle, and GLOB for a glob. =head2 Exception Handling parse() takes various Error_* and Warn_* arguments that determine how it handles any problems it encounters. Each argument can take one of several values. =over 4 =item default The error handling specifed by Error_default in the case of an Error_* argument, or Warn_default in the case of a Warn_* argument, is used. =item noop The error is ignored. =item warn Results in a call to CORE::warn() with a trailing newline, but only if C<$^W> is set to a true value. =item fwarn Like I, but the warning is raised regardless of C<$^W>'s value. =item die Results in a call to CORE::die() with a trailing newline. =item The code reference will be called with a single argument, that of the error message. The error message is guaranteed to contain no trailing newlines (in case the code reference decides to die() or warn()). =back There are various handler arguments. Unless otherwise specified, the default handler is used (Error_default's or Warn_default's value). =over 4 =item Warn_default default: noop The default warning handler. =item Warn_preparse Called just before a file is parsed to indicate parsing is about to begin. =item Warn_eval Called with any warnings issued by the eval'd file. =item Error_default default: warn The default error handler. =item Error_argument Called if there is a problem with one of the arguments specified. =item Error_file_is_dir Called if a configuration file specified was discovered to be a directory. =item Error_failed_open Called if the open attempt on a configuration file fails. =item Error_eval Called if the variable $parse_perl_config{Error} is set in the configuration file, or if there was an eval error. =item Error_unknown_thing Called if there is a problem with a thing character or thing name in a thing argument (thing thing thing). =item Error_unknown_handler Called if an unknown reference is encountered in the Handlers argument. =item Error_invalid_lexical Called if an invalid lexical name or a CODE reference value is encountered in the Lexicals argument. =item Error_invalid_namespace Called if either the constructed namespace (using Namespace_Base) or a specified Namespace value is invalid. This may indicate an error in the construction of a namespace name (the generation of a unique identifier), but it's most likely you specified Namespace_Base or Namespace with invalid characters. =back =head1 BUGS Due to the fact that the scalar slot in a glob is always filled it is not possible to distinguish from a scalar that was never defined (e.g. C<@foo> was, but C<$foo> was never mentioned) from one that is simply undef. Because of this, for example, if you have a thing order of C<$@> and code along the lines of C<$foo = undef; @foo = ();> the 'foo' key of the hash will be an array reference, despite there being a scalar and C<$> coming first in the thing order. =head1 TODO t/parse/symbols.t, t/parse/multi-file.t, t/parse/namespace.t =head1 AUTHOR Michael Fowler =cut