.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 .\" .\" Standard preamble: .\" ======================================================================== .de Sh \" Subsection heading .br .if t .Sp .ne 5 .PP \fB\\$1\fR .PP .. .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. | will give a .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' .\" expand to `' in nroff, nothing in troff, for use with C<>. .tr \(*W-|\(bv\*(Tr .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` . ds C' 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .\" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .hy 0 .if n .na .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "cfg 3" .TH cfg 3 "OSSP cfg 0.9.11" "10-Aug-2006" "Configuration Parsing" .SH "NAME" \&\fBOSSP cfg\fR \- Configuration Parsing .SH "VERSION" .IX Header "VERSION" \&\s-1OSSP\s0 cfg \s-10.9.11 (10-Aug-2006)\s0 .SH "SYNOPSIS" .IX Header "SYNOPSIS" .IP "\fB\s-1API\s0 Header:\fR" 4 .IX Item "API Header:" cfg.h .IP "\fB\s-1API\s0 Types:\fR" 4 .IX Item "API Types:" cfg_t, cfg_rc_t, cfg_node_type_t, cfg_node_t, cfg_node_attr_t, cfg_fmt_t, cfg_data_t, cfg_data_ctrl_t, cfg_data_cb_t, cfg_data_attr_t .IP "\fB\s-1API\s0 Functions:\fR" 4 .IX Item "API Functions:" cfg_create, cfg_destroy, cfg_error, cfg_version, cfg_import, cfg_export, cfg_node_create, cfg_node_destroy, cfg_node_clone, cfg_node_set, cfg_node_get, cfg_node_root, cfg_node_select, cfg_node_find, cfg_node_apply, cfg_node_cmp, cfg_node_link, cfg_node_unlink, cfg_data_set, cfg_data_get, cfg_data_ctrl .SH "DESCRIPTION" .IX Header "DESCRIPTION" \&\fB\s-1OSSP\s0 cfg\fR is a ISO-C library for parsing arbitrary C/\*(C+\-style configuration files. A configuration is sequence of directives. Each directive consists of zero or more tokens. Each token can be either a string or again a complete sequence. This means the configuration syntax has a recursive structure and this way allows to create configurations with arbitrarily nested sections. .PP Additionally the configuration syntax provides complex single/double/balanced quoting of tokens, hexadecimal/octal/decimal character encodings, character escaping, C/\*(C+ and Shell-style comments, etc. The library \s-1API\s0 allows importing a configuration text into an Abstract Syntax Tree (\s-1AST\s0), traversing the \s-1AST\s0 and optionally exporting the \s-1AST\s0 again as a configuration text. .Sh "\s-1CONFIGURATION\s0 \s-1SYNTAX\s0" .IX Subsection "CONFIGURATION SYNTAX" The configuration syntax is described by the following context-free (Chomsky\-2) grammar: .PP \&\fBsequence\fR ::= \fIempty\fR | \fBdirective\fR | \fBdirective\fR \fB\s-1SEP\s0\fR \fBsequence\fR .PP \&\fBdirective\fR ::= \fBtoken\fR | \fBtoken\fR \fBdirective\fR .PP \&\fBtoken\fR ::= \fB\s-1OPEN\s0\fR \fBsequence\fR \fB\s-1CLOSE\s0\fR | \fBstring\fR .PP \&\fBstring\fR ::= \fB\s-1DQ_STRING\s0\fR # double quoted string | \fB\s-1SQ_STRING\s0\fR # single quoted string | \fB\s-1FQ_STRING\s0\fR # flexible quoted string | \fB\s-1PT_STRING\s0\fR # plain text string .PP The other contained terminal symbols are defined itself by the following set of grammars production (regular sub-grammars for character sequences given as Perl-style regular expressions "/\fIregex\fR/"): .PP \&\fB\s-1SEP\s0\fR ::= /;/ .PP \&\fB\s-1OPEN\s0\fR ::= /{/ .PP \&\fB\s-1CLOSE\s0\fR ::= /}/ .PP \&\fB\s-1DQ_STRING\s0\fR ::= /"/ \fB\s-1DQ_CHARS\s0\fR /"/ .PP \&\fB\s-1DQ_CHARS\s0\fR ::= \fIempty\fR | \fB\s-1DQ_CHAR\s0\fR \fB\s-1DQ_CHARS\s0\fR .PP \&\fB\s-1DQ_CHAR\s0\fR ::= /\e\e"/ # escaped quote | /\e\ex\e{[0\-9a\-fA\-F]+\e}/ # hex-char group | /\e\ex[0\-9a\-fA\-F]{2}/ # hex-char | /\e\e[0\-7]{1,3}/ # octal character | /\e\e[nrtbfae]/ # special character | /\e\e\en[ \et]*/ # line continuation | /\e\e\e\e/ # escaped escape | /./ # any other char .PP \&\fB\s-1SQ_STRING\s0\fR ::= /'/ \fB\s-1SQ_CHARS\s0\fR /'/ .PP \&\fB\s-1SQ_CHARS\s0\fR ::= \fIempty\fR | \fB\s-1SQ_CHAR\s0\fR \fB\s-1SQ_CHARS\s0\fR .PP \&\fB\s-1SQ_CHAR\s0\fR ::= /\e\e'/ # escaped quote | /\e\e\en[ \et]*/ # line contination | /\e\e\e\e/ # escaped escape | /./ # any other char .PP \&\fB\s-1FQ_STRING\s0\fR ::= /q/ \fB\s-1FQ_OPEN\s0\fR \fB\s-1FQ_CHARS\s0\fR \fB\s-1FQ_CLOSE\s0\fR .PP \&\fB\s-1FQ_CHARS\s0\fR ::= \fIempty\fR | \fB\s-1FQ_CHAR\s0\fR \fB\s-1FQ_CHARS\s0\fR .PP \&\fB\s-1FQ_CHAR\s0\fR ::= /\e\e/ \fB\s-1FQ_OPEN\s0\fR # escaped open | /\e\e/ \fB\s-1FQ_CLOSE\s0\fR # escaped close | /\e\e\en[ \et]*/ # line contination | /./ # any other char .PP \&\fB\s-1FQ_OPEN\s0\fR ::= /[!"#$%&'()*+,\-./:;<=>?@\e[\e\e\e]^_`{|}~]/ .PP \&\fB\s-1FQ_CLOSE\s0\fR ::= << \fB\s-1FQ_OPEN\s0\fR or corresponding closing char ('}])>') if \fB\s-1FQ_OPEN\s0\fR is a char of '{[(<' >> .PP \&\fB\s-1PT_STRING\s0\fR ::= \fB\s-1PT_CHAR\s0\fR \fB\s-1PT_CHARS\s0\fR .PP \&\fB\s-1PT_CHARS\s0\fR ::= \fIempty\fR | \fB\s-1PT_CHAR\s0\fR \fB\s-1PT_STRING\s0\fR .PP \&\fB\s-1PT_CHAR\s0\fR ::= /[^ \et\en;{}"']/ # none of specials .PP Additionally, white-space \fB\s-1WS\s0\fR and comment \fB\s-1CO\s0\fR tokens are allowed at any position in the above productions of the previous grammar part. .PP \&\fB\s-1WS\s0\fR ::= /[ \et\en]+/ .PP \&\fB\s-1CO\s0\fR ::= \fB\s-1CO_C\s0\fR # style of C | \fB\s-1CO_CXX\s0\fR # style of \*(C+ | \fB\s-1CO_SH\s0\fR # style of /bin/sh .PP \&\fB\s-1CO_C\s0\fR ::= /\e/\e*([^*]|\e*(?!\e/))*\e*\e// .PP \&\fB\s-1CO_CXX\s0\fR ::= /\e/\e/[^\en]*/ .PP \&\fB\s-1CO_SH\s0\fR ::= /#[^\en]*/ .PP Finally, any configuration line can have a trailing backslash character (\f(CW\*(C`\e\*(C'\fR) just before the newline character for simple line continuation. The backslash, the newline and (optionally) the leading whitespaces on the following line are silently obsorbed and as a side-effect continue the first line with the contents of the second lines. .Sh "\s-1CONFIGURATION\s0 \s-1EXAMPLE\s0" .IX Subsection "CONFIGURATION EXAMPLE" A more intuitive description of the configuration syntax is perhaps given by the following example which shows all features at once: .PP .Vb 2 \& /* single word */ \& foo; .Ve .PP .Vb 2 \& /* multi word */ \& foo bar quux; .Ve .PP .Vb 2 \& /* nested structure */ \& foo { bar; baz } quux; .Ve .PP .Vb 4 \& /* quoted strings */ \& 'foo bar' \& "foo\ex0a\et\en\e \& bar" .Ve .SH "APPLICATION PROGRAMMING INTERFACE (API)" .IX Header "APPLICATION PROGRAMMING INTERFACE (API)" \&... .SH "NODE SELECTION SPECIFICATION" .IX Header "NODE SELECTION SPECIFICATION" The \fBcfg_node_select\fR function takes a \fInode selection specification\fR string \fBselect\fR for locating the intended nodes. This specification is defined as: .PP \&\fBselect\fR ::= \fIempty\fR | \fBselect-step\fR \fBselect\fR .PP \&\fBselect-step\fR ::= \fBselect-direction\fR \fBselect-pattern\fR \fBselect-filter\fR .PP \&\fBselect-direction\fR ::= \*(L"./\*(R" # current node | \*(L"../\*(R" # parent node | \*(L"..../\*(R" # anchestor nodes | \*(L"\-/\*(R" # previous sibling node | \*(L"\-\-/\*(R" # preceeding sibling nodes | \*(L"+/\*(R" # next sibling node | \*(L"++/\*(R" # following sibling nodes | \*(L"/\*(R" # child nodes | \*(L"//\*(R" # descendant nodes .PP \&\fBselect-pattern\fR ::= // | \fBtoken\fR .PP \&\fBselect-filter\fR ::= \fIempty\fR | /\e[/ \fBfilter-range\fR /\e]/ .PP \&\fBfilter-range\fR ::= \fBnum\fR # short for: num,num | \fBnum\fR /,/ # short for: num,\-1 | /,/ \fBnum\fR # short for: 1,num | \fBnum\fR /,/ \fBnum\fR .PP \&\fBnum\fR ::= /^[+\-]?[0\-9]+/ .PP \&\fBregex\fR ::= << Regular Expression (PCRE\-based) >> .PP \&\fBtoken\fR ::= << Plain-Text Token String >> .SH "IMPLEMENTATION ISSUES" .IX Header "IMPLEMENTATION ISSUES" Goal: non-hardcoded syntax tokens, only hard-coded syntax structure Goal: time-efficient parsing Goal: space-efficient storage Goal: representation of configuration as \s-1AST\s0 Goal: manipulation (annotation, etc) of \s-1AST\s0 via \s-1API\s0 Goal: dynamic syntax verification .SH "HISTORY" .IX Header "HISTORY" \&\fB\s-1OSSP\s0 cfg\fR was implemented in lots of small steps over a very long time. The first ideas date back to the year 1995 when Ralf S. Engelschall attended his first compiler construction lessons at university. But it was first time finished in summer 2002 by him for use in the \fB\s-1OSSP\s0\fR project. .SH "AUTHOR" .IX Header "AUTHOR" .Vb 3 \& Ralf S. Engelschall \& rse@engelschall.com \& www.engelschall.com .Ve