/* * ffe - Flat File Extractor * * Copyright (C) 2006 Timo Savinen * This file is part of ffe. * * ffe is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * ffe is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ffe; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ /* $Id: ffe.c,v 1.54 2007-10-30 17:44:47 timo Exp $ */ #include "ffe.h" #ifdef HAVE_GETOPT_H #include #endif #include #include #ifdef HAVE_STRING_H #include #endif #ifdef PACKAGE static char *program = PACKAGE; #else static char *program = "ffe"; #endif #ifdef PACKAGE_VERSION static char *version = PACKAGE_VERSION; #else static char *version = "0.2.2"; #endif #ifdef HOST static char *host = HOST; #else static char *host = ""; #endif #ifdef PACKAGE_BUGREPORT static char *email_address = PACKAGE_BUGREPORT; #else static char *email_address = "tjsa@iki.fi"; #endif static char short_opts[] = "c:s:o:p:f:e:r:l?Vav"; #ifdef HAVE_GETOPT_LONG static struct option long_opts[] = { {"configuration",1,NULL,'c'}, {"structure",1,NULL,'s'}, {"output",1,NULL,'o'}, {"print",1,NULL,'p'}, {"field-list",1,NULL,'f'}, {"loose",0,NULL,'l'}, {"expression",1,NULL,'e'}, {"help",0,NULL,'?'}, {"version",0,NULL,'V'}, {"and",0,NULL,'a'}, {"invert-match",0,NULL,'v'}, {"replace",1,NULL,'r'}, {NULL,0,NULL,0} }; #endif /* global rc-data */ struct structure *structure = NULL; struct output *output = NULL; struct expression *expression = NULL; struct lookup *lookup = NULL; struct replace *replace = NULL; struct field *const_field = NULL; /* output no marker */ struct output dummy; struct output *no_output = &dummy; void panic(char *msg,char *info,char *syserror) { if (info == NULL && syserror == NULL) { fprintf(stderr,"%s: %s\n",program,msg); } else if(info != NULL && syserror == NULL) { fprintf(stderr,"%s: %s: %s\n",program,msg,info); } else if(info != NULL && syserror != NULL) { fprintf(stderr,"%s: %s: %s; %s\n",program,msg,info,syserror); } else if(info == NULL && syserror != NULL) { fprintf(stderr,"%s: %s; %s\n",program,msg,syserror); } exit(EXIT_FAILURE); } char * get_default_rc_name() { char *home; char *result; #ifdef WIN32 char *file = "ffe.rc"; #else char *file = ".fferc"; #endif result = NULL; home = getenv("HOME"); if(home != NULL) { result = xmalloc(strlen(home) + strlen(file) + strlen(PATH_SEPARATOR_STRING) + 2); strcpy(result,home); strcat(result,PATH_SEPARATOR_STRING); strcat(result,file); } else { result = file; } return result; } void help(FILE *stream) { fprintf(stream,"Usage: %s [OPTION]...\n\n",program); #ifdef HAVE_GETOPT_LONG fprintf(stream,"-c, --configuration=FILE\n"); fprintf(stream,"\t\tRead configuration from FILE, default is \'%s\'.\n",get_default_rc_name()); fprintf(stream,"-s, --structure=STRUCTURE\n"); fprintf(stream,"\t\tUse structure STRUCTURE for input file, suppresses guessing.\n"); fprintf(stream,"-p, --print=FORMAT\n"); fprintf(stream,"\t\tUse output format FORMAT for printing.\n"); fprintf(stream,"-o, --output=NAME\n"); fprintf(stream,"\t\tWrite output to NAME instead of standard output.\n"); fprintf(stream,"-f, --field-list=LIST\n"); fprintf(stream,"\t\tPrint only fields and constants listed in comma separated list LIST.\n"); fprintf(stream,"-e, --expression=EXPRESSION\n"); fprintf(stream,"\t\tPrint only those records for which the EXPRESSION evaluates to true.\n"); fprintf(stream,"-a, --and\n"); fprintf(stream,"\t\tExpressions are combined with logical and, default is logical or.\n"); fprintf(stream,"-v, --invert-match\n"); fprintf(stream,"\t\tPrint only those records which don't match the expression.\n"); fprintf(stream,"-l, --loose\n"); fprintf(stream,"\t\tAn invalid input line does not cause %s to abort.\n",program); fprintf(stream,"-r, --replace=FIELD=VALUE\n"); fprintf(stream,"\t\tReplace FIELDs contents with VALUE in output.\n"); fprintf(stream,"-?, --help\n"); fprintf(stream,"\t\tDisplay this help and exit.\n"); fprintf(stream,"-V, --version\n"); #else fprintf(stream,"-c FILE\n"); fprintf(stream,"\t\tRead configuration from FILE, default is \'%s\'.\n",get_default_rc_name()); fprintf(stream,"-s STRUCTURE\n"); fprintf(stream,"\t\tUse structure STRUCTURE for input file, suppresses guessing.\n"); fprintf(stream,"-p FORMAT\n"); fprintf(stream,"\t\tUse output format FORMAT for printing.\n"); fprintf(stream,"-o NAME\n"); fprintf(stream,"\t\tWrite output to NAME instead of standard output.\n"); fprintf(stream,"-f LIST\n"); fprintf(stream,"\t\tPrint only fields and constants listed in comma separated list LIST.\n"); fprintf(stream,"-e EXPRESSION\n"); fprintf(stream,"\t\tPrint only those records for which the EXPRESSION evaluates to true.\n"); fprintf(stream,"-a\n"); fprintf(stream,"\t\tExpressions are combined with logical and, default is logical or.\n"); fprintf(stream,"-v\n"); fprintf(stream,"\t\tPrint only those records which don't match the expression.\n"); fprintf(stream,"-l\n"); fprintf(stream,"\t\tAn invalid input line does not cause %s to abort.\n",program); fprintf(stream,"-r FIELD=VALUE\n"); fprintf(stream,"\t\tReplace FIELDs contents with VALUE in output.\n"); fprintf(stream,"-?\n"); fprintf(stream,"\t\tDisplay this help and exit.\n"); fprintf(stream,"-V\n"); #endif fprintf(stream,"\t\tShow version and exit.\n"); fprintf(stream,"\nAll remaining arguments are names of input files;\n"); fprintf(stream,"if no input files are specified, then the standard input is read.\n"); fprintf(stream,"\nSend bug reports to %s.\n",email_address); } void usage(int opt) { fprintf(stderr,"Unknown option '-%c'\n",(char) opt); help(stderr); } void print_version() { printf("%s version %s (%s)\n",program,version,host); printf("Copyright (c) 2007 Timo Savinen\n\n"); printf("This is free software; see the source for copying conditions.\n"); printf("There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); } struct output * search_output(char *name) { struct output *o = output; if(strcmp(name,"no") == 0) return no_output; while(o != NULL) { if(strcmp(name,o->name) == 0) return o; o = o->next; } fprintf(stderr,"%s: Unknown output \'%s\'\n",program,name); return NULL; } /* returns a record after name */ struct record * find_record(struct structure *s,char *name) { struct record *ret = s->r; while(ret != NULL) { if(strcmp(ret->name,name) == 0) return ret; ret = ret->next; } return NULL; } /* find a structure after a name */ struct structure * find_structure(char *name) { struct structure *s = structure; while(s != NULL) { if(strcmp(s->name,name) == 0) return s; s = s->next; } return NULL; } /* check structure and output integrity */ /* and initialize some things */ void check_rc(char *use_output) { struct structure *s; struct output *o; struct record *r,*fr; struct field *f; struct lookup *l; int several_records; int errors = 0; int ordinal; int field_count_first; char num[64]; s = structure; o = output; if(s == NULL) { errors++; fprintf(stderr,"%s: No structure definitions in rc-file\n",program); } while(s != NULL) { if(use_output != NULL) { s->output_name = xstrdup(use_output); } s->o = search_output(s->output_name); if(s->o == NULL) errors++; r = s->r; if(r == NULL) { errors++; fprintf(stderr,"%s: No records in structure \'%s\'\n",program,s->name); } else { several_records = r->next != NULL ? 1 : 0; } if(s->quote && s->type[0] == SEPARATED) { if(s->quote == s->type[1]) { errors++; fprintf(stderr,"%s: Quotation and separator cannot be the same character, structure \'%s\'\n",program,s->name); } } if(s->header && s->type[0] != SEPARATED) { errors++; fprintf(stderr,"%s: Headers are valid only in separated input, structure \'%s\'\n",program,s->name); } field_count_first = 0; while(r != NULL) { if(r->output_name == NULL) { r->output_name = s->output_name; r->o = s->o; } else { r->o = search_output(r->output_name); if(r->o == NULL) errors++; } if(r->fields_from != NULL) { if(r->f != NULL) { errors++; fprintf(stderr,"%s: field and fields-from are mutually exclusive, structure \'%s\', record \'%s\'\n",program,s->name,r->name); } fr = find_record(s,r->fields_from); if(fr != NULL) { r->f = fr->f; } else { errors++; fprintf(stderr,"%s: No record named as '\%s\' in structure \'%s\'\n",program,r->fields_from,s->name); } } f = r->f; if(f == NULL) { errors++; fprintf(stderr,"%s: No fields in record \'%s\'\n",program,r->name); } r->length = 0; ordinal = 1; while(f != NULL) { if(s->type[0] == FIXED_LENGTH) { f->position = r->length; r->length += f->length; } else { f->position = ordinal; r->length++; if(s->header) { if(r == s->r) { field_count_first++; } } } if(!s->header && f->name == NULL) { sprintf(num,"%d",ordinal); f->name = xstrdup(num); } if(s->type[0] == FIXED_LENGTH && !f->length) { errors++; fprintf(stderr,"%s: A field \'%s\' must have length in fixed length structure \'%s\' record \'%s\'\n",program,f->name,s->name,r->name); } if(f->lookup_table_name != NULL) { l = lookup; while(l != NULL && f->lookup == NULL) { if(strcmp(l->name,f->lookup_table_name) == 0) { f->lookup = l; } l = l->next; } if(f->lookup == NULL) { errors++; fprintf(stderr,"%s: No lookup table named as '%s'\n",program,f->lookup_table_name); } } f = f->next; ordinal++; } if(s->header && r->length != field_count_first) { errors++; fprintf(stderr,"%s: All records in separated structure with header must have equal count of fields, structure \'%s\'\n",program,s->name); } r = r->next; } s = s->next; } if(o == NULL) { errors++; fprintf(stderr,"%s: No output definitions in rc-file\n",program); } while(o != NULL) { if(o->lookup == NULL) o->lookup = o->data; o = o->next; } if(errors) { panic("Errors in rc-file",NULL,NULL); } } void add_replace(char *optarg) { char *op_pos; struct replace *r; if((op_pos = strchr(optarg,'=')) == NULL) { panic("Replace expression must contain =-character",optarg,NULL); } *op_pos = 0; r = replace; if(r == NULL) { replace = xmalloc(sizeof(struct replace)); replace->next = NULL; r = replace; } else { while(r->next != NULL) r = r->next; r->next = xmalloc(sizeof(struct replace)); r = r->next; r->next = NULL; } r->field = xstrdup(optarg); op_pos++; r->value = xstrdup(op_pos); r->found = 0; } struct expr_list * read_expression_file(char *file) { struct expr_list *retval,*e; FILE *fp; int ccount; int line_len = 1024; char *line = xmalloc(line_len); e = NULL; retval = NULL; fp = fopen(file,"r"); if(fp == NULL) panic("Error in opening file",file,strerror(errno)); do { #ifdef HAVE_GETLINE ccount = getline(&line,&line_len,fp); #else if(fgets(line,line_len,fp) != NULL) { ccount = strlen(line); } else { ccount = -1; } #endif if (ccount != -1) { if(ccount) ccount--; line[ccount] = 0; if(e == NULL) { e = xmalloc(sizeof(struct expr_list)); e->value = xstrdup(line); retval = e; } else { e->next = xmalloc(sizeof(struct expr_list));; e = e->next; e->value = xstrdup(line); } e->value_len = ccount; e->next = NULL; } } while(ccount != -1); fclose(fp); free(line); return retval; } void add_expression(char *optarg) { char *op_pos; char op = 0; struct expression *e; struct expr_list *el; int rc; size_t buflen; char *errbuf; char *value_file; if((op_pos = strchr(optarg,OP_REQEXP)) != NULL) { #ifdef HAVE_REGEX op = OP_REQEXP; #else panic("Regular expressions are not supported in this system",optarg,NULL); #endif } else if((op_pos = strchr(optarg,OP_EQUAL)) != NULL) { op = OP_EQUAL; } else if((op_pos = strchr(optarg,OP_START)) != NULL) { op = OP_START; } else if((op_pos = strchr(optarg,OP_CONTAINS)) != NULL) { op = OP_CONTAINS; } else if((op_pos = strchr(optarg,OP_NOT_EQUAL)) != NULL) { op = OP_NOT_EQUAL; } else { panic("Expression must contain an operator: =,^,~,? or !",optarg,NULL); } *op_pos = 0; e = expression; if(e == NULL) { expression = xmalloc(sizeof(struct expression)); expression->next = NULL; e = expression; } else { while(e->next != NULL) e = e->next; e->next = xmalloc(sizeof(struct expression)); e = e->next; e->next = NULL; } e->field = xstrdup(optarg); op_pos++; if(strstr(op_pos,"file:") == op_pos) { value_file = expand_home(&op_pos[5]); e->el = read_expression_file(value_file); if(e->el == NULL) panic("No values in file",value_file,NULL); free(value_file); } else { e->value = xstrdup(op_pos); e->value_len = strlen(e->value); e->el = NULL; } e->found = 0; e->op = op; #ifdef HAVE_REGEX if(e->op == OP_REQEXP) { if(e->el != NULL) { el = e->el; while(el != NULL) { rc = regcomp(&el->reg,el->value,REG_EXTENDED | REG_NOSUB); if(rc) { buflen = regerror(rc,&e->reg,NULL,0); errbuf = xmalloc(buflen + 1); regerror(rc,&e->reg,errbuf,buflen); panic("Error in regular expression",el->value,errbuf); } el = el->next; } } else { rc = regcomp(&e->reg,e->value,REG_EXTENDED | REG_NOSUB); if(rc) { buflen = regerror(rc,&e->reg,NULL,0); errbuf = xmalloc(buflen + 1); regerror(rc,&e->reg,errbuf,buflen); panic("Error in regular expression",e->value,errbuf); } } } #endif } int main(int argc, char **argv) { int opt; int strict = 1; int expression_and = 0; int expression_invert = 0; struct structure *s; char *structure_to_use = NULL; char *output_to_use = NULL; char *config_to_use = NULL; char *ofile_to_use = NULL; char *field_list = NULL; #ifdef HAVE_GETOPT_LONG while ((opt = getopt_long(argc,argv,short_opts,long_opts,NULL)) != -1) #else while ((opt = getopt(argc,argv,short_opts)) != -1) #endif { switch(opt) { case 'c': if(config_to_use == NULL) { config_to_use = xstrdup(optarg); } else { panic("Only one -c option allowed",NULL,NULL); } break; case 's': if(structure_to_use == NULL) { structure_to_use = xstrdup(optarg); } else { panic("Only one -s option allowed",NULL,NULL); } break; case 'p': if(output_to_use == NULL) { output_to_use = xstrdup(optarg); } else { panic("Only one -p option allowed",NULL,NULL); } break; case 'f': if(field_list == NULL) { field_list = xstrdup(optarg); } else { panic("Only one -f option allowed",NULL,NULL); } break; case 'o': if(ofile_to_use == NULL) { ofile_to_use = xstrdup(optarg); } else { panic("Only one -o option allowed",NULL,NULL); } break; case 'e': add_expression(optarg); break; case 'r': add_replace(optarg); break; case 'a': expression_and = 1; break; case 'v': expression_invert = !expression_invert; break; case 'l': strict = 0; break; case '?': help(stdout); exit(EXIT_SUCCESS); break; case 'V': print_version(); exit(EXIT_SUCCESS); break; default: usage(opt); exit(EXIT_FAILURE); break; } } if(optind < argc) { while(optind < argc) set_input_file(argv[optind++]); } else { set_input_file("-"); } if(config_to_use == NULL) config_to_use = get_default_rc_name(); parserc(config_to_use,field_list); check_rc(output_to_use); open_input_file(); if(structure_to_use == NULL) { structure_to_use = guess_structure(); if(structure_to_use == NULL) panic("A structure cannot be guessed, use -s option",NULL,NULL); } s = find_structure(structure_to_use); if(s == NULL) { panic("No structure named as",structure_to_use,NULL); } set_output_file(ofile_to_use); execute(s,strict,expression_and,expression_invert); close_output_file(); exit(EXIT_SUCCESS); }