#include #include #include #include #include #include "debug.h" #include "clean_spaces.h" #include "suppress_spaces.h" #include "document.h" #define NO_PARENT -25 #define SS(X,Y) strcasecmp( X, Y ) == 0 /* Initialize defaults */ int document_init( document *d ) { d -> chars_per_line = 135; d -> lines_per_page = 66; d -> width = letter_height; d -> height = letter_width; d -> left_margin = 40; d -> top_margin = 40; d -> font_size = 9; d -> output_filename = NULL; d -> font_face = NULL; d -> image_dpi = 72; d -> image_filename = NULL; d -> index_count = 0; d -> index_def = NULL; d -> default_reparent = DEFAULT_REPARENT; d -> subject = NULL; d -> title = NULL; d -> author = NULL; d -> extra_key = NULL; d -> extra_data = NULL; d -> strip_non_printable = 1; d -> input_filename = NULL; d -> pdf = PDF_new(); return( 1 ); } int document_add_index( document *d, const indexd *i ) { indexd *t; int dump_index; int error; char errmsg[ 255 ]; if( d -> index_count == 0 ) { dmsg( 3, "Initializing new indexes" ); d -> index_def = (indexd *)malloc( 1 ); } /* Validate it looks OK */ dump_index = 0; if( i -> start_char >= i -> stop_char ) { fprintf( stderr, "Start char must be smaller than stop char\n" ); dump_index = 1; } if( dump_index ) { fprintf( stderr, "Index definition:\n" "Page: %i\n" "Line: %i\n" "Start: %i\n" "Stop: %i\n" "Suppress Spaces: %i\n" "Reparent: %i\n" "Format: '%s'\n", i -> page_no, i -> line_no, i -> start_char, i -> stop_char, i -> suppress_spaces, i -> reparent, i -> format ); return( 1 ); } /* Allocate space and copy in index */ d -> index_count++; d -> index_def = (indexd *)realloc( d -> index_def, sizeof( indexd ) * d -> index_count ); t = &(d -> index_def[ d -> index_count - 1 ]); memcpy( t, i, sizeof( indexd ) ); /* Null this out, we'll set and use it later */ t -> last_index = NULL; t -> indexid = NO_PARENT; /* Make it 1 based */ t -> start_char--; t -> stop_char--; dmsg( 2, "Added index #%d", d -> index_count ); dmsg( 3, "page_no: %i, line_no: %i, start_char: %i, end_char: %i, suppress_chars: %i", t -> page_no, t -> line_no, t -> start_char, t -> stop_char, t -> suppress_spaces ); /* Compile regex */ if( t -> index_type == IDX_REGEX ) { dmsg( 3, "Compiling regex: '%s'", t -> regstring ); error = regcomp( &t -> regcomp, t -> regstring, REG_EXTENDED ); if( error ) { regerror( error, &t -> regcomp, errmsg, 255 ); fprintf( stderr, "Regex error: (%d) %s\n", error, errmsg ); } } return( 1 ); } int document_start( document *d ) { if( d -> output_filename == NULL ) { dmsg( 2, "Using STDOUT" ); PDF_open_file( d -> pdf, "-" ); }else { dmsg( 2, "Using file: '%s'", d -> output_filename ); PDF_open_file( d -> pdf, d -> output_filename ); } /* Calculate image scale */ if( d -> image_filename != NULL ) { dmsg( 3, "Using background image: '%s'", d -> image_filename ); d -> image_id = PDF_open_image_file( d -> pdf, "tiff", d -> image_filename, NULL, 0 ); d -> image_scale_x = (float)72.0 / d -> image_dpi; d -> image_scale_y = (float)72.0 / d -> image_dpi; dmsg( 3, "Scale is: %fx%f", d -> image_scale_x, d -> image_scale_y ); } if( d -> font_face == NULL ) { d -> font_index = PDF_findfont( d -> pdf, "Courier", "host", 0 ); }else { d -> font_index = PDF_findfont( d -> pdf, d -> font_face, "host", 0 ); } d -> line_count = 0; d -> total_line_count = 0; d -> page_count = 0; /* Set up info */ PDF_set_info( d -> pdf, "Subject", d -> subject ? d -> subject : "PDF Document" ); PDF_set_info( d -> pdf, "Title", d -> title ? d -> title : d -> input_filename ); PDF_set_info( d -> pdf, "Creator", "ipdf V." VERSION " by Steve Slaven - http://hoopajoo.net" ); PDF_set_info( d -> pdf, "Author", d -> author ? d -> author : "Unknown" ); if( d -> extra_data != NULL ) { PDF_set_info( d -> pdf, d -> extra_key ? d -> extra_key : "Keywords", d -> extra_data ); } /* Set the bookmarkdest to be zoom */ if( PDF_get_majorversion() > 3 ) { PDF_set_parameter( d -> pdf, "bookmarkdest", "fitwidth" ); } return( 0 ); } int document_addline( document *d, const char *line ) { char *t; int found_index, c, parent_id, end_page; indexd *i; regmatch_t junk; d -> line_count++; d -> total_line_count++; dmsg( 4, "Adding line, line_count: %i total_line_count: %i", d -> line_count, d -> total_line_count ); if( d -> line_count == 1 ) { d -> page_count++; dmsg( 2, "Starting new page, #%i", d -> page_count ); PDF_begin_page( d -> pdf, d -> width, d -> height ); /* Add image */ if( d -> image_filename != NULL ) { dmsg( 4, "Setting background" ); PDF_save( d -> pdf ); PDF_scale( d -> pdf, d -> image_scale_x, d -> image_scale_y ); PDF_place_image( d -> pdf, d -> image_id, 0, 0, 1 ); PDF_restore( d -> pdf ); } PDF_set_text_pos( d -> pdf, d -> left_margin, d -> height - d -> top_margin ); PDF_setfont( d -> pdf, d -> font_index, d -> font_size ); } /* Text out */ dmsg( 5, "Truncating line: '%s'", line ); t = (char *)malloc( d -> chars_per_line + 1 ); bzero( t, d -> chars_per_line + 1 ); strncpy( t, line, d -> chars_per_line ); if( d -> strip_non_printable ) { /* Strip non-printable */ for( c = 0; c < strlen( t ); c++ ) { if( ( t[ c ] != '\t' ) && ( ! isprint( (char)t[ c ] ) ) ) { t[ c ] = ' '; } } } dmsg( 5, " Adding line: '%s'", t ); PDF_continue_text( d -> pdf, t ); free( t ); /* Now try and index this line */ found_index = 0; parent_id = 0; for( c = 0; c < d -> index_count; c++ ) { i = &d -> index_def[ c ]; if( found_index ) { /* Zero this, index found higher up so we reset */ dmsg( 4, "Index already happened, resetting me at #%i", c ); if( i -> last_index != NULL ) { free( i -> last_index ); i -> last_index = NULL; } /* Clear it's index */ i -> indexid = NO_PARENT; }else { switch( i -> index_type ) { case IDX_RELATIVE: dmsg( 4, "Trying to index, line is %i, looking for %i", d -> line_count, i -> line_no ); if( i -> line_no == d -> line_count ) { found_index = document_do_index( d, i, line, parent_id, c ); } break; case IDX_ABSOLUTE: /* Check page_no and line_count */ if( ( ( i -> page_no == 0 ) && /* page 0 means index on total linecount */ ( i -> line_no == d -> total_line_count ) ) || ( ( i -> line_no == d -> line_count ) && /* normal index */ ( i -> page_no == d -> page_count ) ) || ( ( i -> page_no == -1 ) && /* page -1 means index this line on every page */ ( i -> line_no == d -> line_count ) ) ) { document_do_index( d, i, line, parent_id, c ); } break; case IDX_REGEX: /* do a compare */ if( regexec( &i -> regcomp, line, 0, &junk, 0 ) == 0 ) { found_index = document_do_index( d, i, line, parent_id, c ); } break; } } /* Save pid */ parent_id = i -> indexid; } /* Form feed if needed */ end_page = 0; if( strchr( line, '\f' ) ) { /* Form feed */ dmsg( 2, "Form feed on line %i", d -> line_count ); end_page = 1; }else if( d -> line_count > d -> lines_per_page ) { /* Advance */ dmsg( 2, "Reached max line count, new page" ); end_page = 1; } if( end_page ) { dmsg( 3, "Ending current page" ); PDF_end_page( d -> pdf ); d -> line_count = 0; } return( 1 ); } int document_end( document *d ) { /* Flush any extra data */ if( d -> line_count > 0 ) { dmsg( 2, "Flushing final data page" ); PDF_end_page( d -> pdf ); } dmsg( 3, "Closing PDF" ); PDF_close( d -> pdf ); PDF_delete( d-> pdf ); } int document_do_index( document *d, indexd *i, const char *line, int parent_id, int depth ) { char *t, *n = NULL; int index_len, reparent; /* And index line, pull our substring and match it */ index_len = i -> stop_char - i -> start_char; t = (char *)malloc( index_len + 1 ); bzero( t, index_len + 1 ); t = strncpy( t, &line[ i -> start_char ], index_len ); if( i -> suppress_spaces ) { dmsg( 4, "Normalizing space" ); suppress_spaces( t ); } /* Clean spaces */ clean_spaces( t, CL_BOTH ); dmsg( 4, "index substring: '%s'", t ); /* Compare */ if( ( ! i -> suppress_duplicates ) || ( ( i -> last_index == NULL ) || ( strcmp( i -> last_index, t ) != 0 ) ) ) { dmsg( 3, "New index: %s", t ); if( i -> reparent ) { dmsg( 2, "Reparenting requested, depth is: %i, reparent is: %i", depth, i -> reparent ); /* Reparent back N levels */ reparent = depth - i -> reparent - 1; if( reparent < 0 ) { dmsg( 2, "Re-parented index to top" ); parent_id = 0; }else { dmsg( 2, "Re-parented to sub index" ); parent_id = d -> index_def[ reparent ].indexid; } } /* Make sure parent is up */ if( parent_id != NO_PARENT ) { dmsg( 2, "OK, indexing: '%s'", t ); /* Save this for future ref */ if( i -> last_index != NULL ) { free( i -> last_index ); } i -> last_index = strdup( t ); /* Slap it in the format string, if it's defined */ /* Coming out of this, *t should have the index string */ if( i -> format != NULL ) { n = (char *)malloc( strlen( i -> format ) + strlen( t ) + 1 ); sprintf( n, i -> format, t ); free( t ); t = n; } /* Get around NULL bookmark */ if( t[ 0 ] == 0 ) { free( t ); t = strdup( "NULL" ); } dmsg( 3, "Parentid for index: %i", parent_id ); i -> indexid = PDF_add_bookmark( d -> pdf, t, parent_id, 0 ); /* If we alloc'd n then dealloc */ free( t ); /* found index */ return( 1 ); }else { dmsg( 2, "Not ready for this index, discarding" ); free( t ); } }else { dmsg( 4, "Repeat" ); free( t ); } return( 0 ); } int document_set_page( document *d, const char *ptype ) { /* page mode */ if( SS( ptype, "letter-p" ) ) { d -> width = letter_width; d -> height = letter_height; }else if( SS( ptype, "letter-l" ) ) { d -> width = letter_height; d -> height = letter_width; }else if( SS( ptype, "a0-p" ) ) { d -> width = a0_width; d -> height = a0_height; }else if( SS( ptype, "a0-l" ) ) { d -> width = a0_height; d -> height = a0_width; }else if( SS( ptype, "a1-p" ) ) { d -> width = a1_width; d -> height = a1_height; }else if( SS( ptype, "a1-l" ) ) { d -> width = a1_height; d -> height = a1_width; }else if( SS( ptype, "a2-p" ) ) { d -> width = a2_width; d -> height = a2_height; }else if( SS( ptype, "a2-l" ) ) { d -> width = a2_height; d -> height = a2_width; }else if( SS( ptype, "a3-p" ) ) { d -> width = a3_width; d -> height = a3_height; }else if( SS( ptype, "a3-l" ) ) { d -> width = a3_height; d -> height = a3_width; }else if( SS( ptype, "a4-p" ) ) { d -> width = a4_width; d -> height = a4_height; }else if( SS( ptype, "a4-l" ) ) { d -> width = a4_height; d -> height = a4_width; }else if( SS( ptype, "a5-p" ) ) { d -> width = a5_width; d -> height = a5_height; }else if( SS( ptype, "a5-l" ) ) { d -> width = a5_height; d -> height = a5_width; }else if( SS( ptype, "a6-p" ) ) { d -> width = a6_width; d -> height = a6_height; }else if( SS( ptype, "a6-l" ) ) { d -> width = a6_height; d -> height = a6_width; }else if( SS( ptype, "b5-p" ) ) { d -> width = b5_width; d -> height = b5_height; }else if( SS( ptype, "b5-l" ) ) { d -> width = b5_height; d -> height = b5_width; }else if( SS( ptype, "legal-p" ) ) { d -> width = legal_width; d -> height = legal_height; }else if( SS( ptype, "legal-l" ) ) { d -> width = legal_height; d -> height = legal_width; }else if( SS( ptype, "ledger-p" ) ) { d -> width = ledger_width; d -> height = ledger_height; }else if( SS( ptype, "ledger-l" ) ) { d -> width = ledger_height; d -> height = ledger_width; }else if( SS( ptype, "11x17-p" ) ) { d -> width = p11x17_width; d -> height = p11x17_height; }else if( SS( ptype, "11x17-l" ) ) { d -> width = p11x17_height; d -> height = p11x17_width; }else { fprintf( stderr, "Unknown page mode: '%s'\n", ptype ); exit(0); } }