/*- * Copyright ¿ 2005, 2006 Vyacheslav Anikin. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $Id: rux.c,v 1.39 2006/08/06 16:39:40 ghos Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "version.h" static char *version = VERSION; #include #include #include #include #include #include #include #include /*#include */ /* Uncomment if you want to use basename */ #ifdef MEMDEBUG #include "dmalloc.h" #else #define dmalloc malloc #define dfree free #endif /* MEMDEBUG */ #include "cptaba.h" #include "cpdetect.h" struct cpinfo * detect_codepage(struct cp_detect *detinf, FILE *in, FILE *out, int fdoconv, float *cpexp); void doconv_all(u_char *conv_tab, FILE *in, FILE *out); struct cpinfo *get_cp_info (const char *cpid); u_char *init_tab (u_char *tab, int len, struct cpinfo *in, struct cpinfo *out); void print_usage(void); void print_cplist(struct cpinfo *cplist); void add_buf(u_char *buf, int size); void free_bufs(); void recode_buf(u_char *conv_tab, u_char *buf, int len, FILE *out); int pwarnf(const char *fmt, ...); /* Default flags' values */ static int f_graph = 0; /* Recode pseudo-graphics too */ static int f_quiet = 0; /* Quiet mode, shut stderr */ static int f_doconv = 1; /* Do convert by default */ static int f_detect = 1; /* Auto detect by default */ static int f_lax = 0; /* Don't touch lax-files */ static struct cpinfo *i_cp = NULL; static struct cpinfo *o_cp = NULL; /* Emanate on minimal file block */ static int fbuf_size = 512; struct databuf { u_char *data; int size; struct databuf *next; }; static struct databuf *head = NULL; static struct databuf *last = NULL; /* * OK! Are you want to recode or detect code page of some file or files? * No problem. I'll do it for you! */ int main(int argc, char **argv) { u_char conv_tab[_CPINFO_TSIZE]; int readstdin = 0; /* Don't read from stdin by default */ FILE *in = stdin; FILE *out = stdout; extern char *optarg; extern int optind; u_char *iobuf = NULL; /* File input buffer */ int flag; /* An option */ #ifndef DEFAULT_OUTPUT_CP #define DEFAULT_OUTPUT_CP 2 #endif struct cpinfo *def_ocp = &codepage[DEFAULT_OUTPUT_CP]; /* Info about detection of a code page */ struct cp_detect detinf; o_cp = def_ocp; /* Initialize o_cp using default cp */ while ((flag = getopt(argc, argv, "i:o:thegTs:ql")) != -1) { switch (flag) { case 'e': /* Universal recoding pseudo-graphics */ f_graph = 0; break; case 'g': /* Normal recoding pseudo-graphics */ f_graph = 1; break; case 'i': /* An input code page */ i_cp = get_cp_info(optarg); f_detect = 0; break; case 'o': /* An output code page */ o_cp = get_cp_info(optarg); break; case 't': /* Detect and recode */ f_doconv = 1; f_detect = 1; break; case 'T': /* Detect only */ f_doconv = 0; f_detect = 1; break; case 's': /* Block size in bytes */ pwarnf("Using -s option is deprecated!\n"); break; case 'q': /* Suspress all warnings */ f_quiet = 1; break; case 'l': f_lax = 1; break; case 'h': default: print_usage(); return 0; } } argc -= optind; argv += optind; /* Brings to default output code page if it doesn't specified. */ if (o_cp == NULL) { pwarnf("Brings to default output code page (%s).\n", def_ocp->cp_name); o_cp = def_ocp; } if (f_detect == 0) { if (i_cp == NULL) { pwarnf("Could not continue without input code page.\n"); return -1; } } else { if ((iobuf = (u_char *)dmalloc(fbuf_size)) == NULL) { perror(NULL); return errno; } detinf.cp_list = (struct cpinfo *)&codepage; detinf.cp_default = o_cp; detinf.cp_databuf = (u_char *)iobuf; } if (argc == 0) { readstdin = 1; /* read from stdin if not enough any arguments */ } /* This cycle is controlled by return and break ops. Cycle breaks after first loop if we reading from stdin (flag readstdin is on) */ while (1) { const char *fname = *argv++; if (readstdin) { in = stdin; } else if ((in = fopen(fname, "r")) == NULL) { perror(fname); if (*argv == NULL) { break; } continue; } if (f_detect != 0) { float cpexp = .0; i_cp = detect_codepage(&detinf, in, out, f_doconv, &cpexp); if (f_doconv == 0) { /*#define SHOWCPEXP*/ #ifdef SHOWCPEXP printf("%s: %s (%.8f)\n", readstdin ? "" : /*basename*/(fname), cpexp == .0 ? "us-ascii (no hits)" : i_cp->cp_name, i_cp->cp_exp); #else printf("%s: %s\n", readstdin ? "" : /*basename*/(fname), cpexp == .0 ? "us-ascii (no hits)" : i_cp->cp_name); #endif } } if (f_doconv != 0) { init_tab(conv_tab, sizeof(conv_tab), i_cp, o_cp); doconv_all(conv_tab, in, out); } fclose(in); if (readstdin) { break; } if (*argv == NULL) { break; } } if (f_detect != 0) { dfree(iobuf); } return 0; } /* This routine is used for determine a code page info from an identifier */ struct cpinfo * get_cp_info(const char *cpid) { int i = 0; while (1) { if (codepage[i].cp_name == NULL) { break; } if (strcmp(codepage[i].cp_name, cpid) == 0) { return &codepage[i]; } ++i; } pwarnf("Wrong code page identifier: %s\n", cpid); return NULL; } /* Table completion routine. Used for filling up the alternative table within additional data */ void compl_tab(u_char *tab, struct cpinfo *incp, struct cpinfo *outcp) { int i; int t_gr = f_graph; /* The pseudo-graphics will be lost (or segmentation fault occure) if we'll recode using -g option from the code page with pseudo-graphics chars to without. */ if ((f_graph) && (incp->cp_graphsize != 0) && (outcp->cp_graphsize == 0)) { pwarnf("Output code page has no pseudo-graphics. Assuming a " "-g option off.\n"); f_graph = 0; } if ((i = incp->cp_graphsize) != 0) { for (--i; i >= 0; i--) { tab[incp->cp_gdata[i]] = f_graph ? outcp->cp_gdata[i] : boxdr_map[i]; } } /* Always restore an old f_graph value. We'll don't loss any time to additional check. */ f_graph = t_gr; } /* The alternative table initialization routine. Used for speed up recoding */ u_char * init_tab(u_char *tab, int len, struct cpinfo *in, struct cpinfo *out) { int i = len - 1; for (; i >= 0; i--) { tab[i] = i; } for (i = out->cp_size; i >= 0; i--) { tab[in->cp_data[i]] = out->cp_data[i]; } compl_tab(tab, in, out); return tab; } void print_usage(void) { /* DATETIME was defined in version.h */ printf(" rux %s " DATETIME "\n" " Usage: rux [-egtT] [-i incp] [-o outcp] [file ...]\n\n" " -e -- Replace box-drawing characters by non-graphic. " "Default.\n" " -g -- Inverse of -e option. (Overrides any previous -e " "option).\n\n" " -t -- Attempt to detect a code page of the input files." " Default.\n" " -T -- Just show the code pages of the input files.\n\n", version); printf(" -i -- Specify an input code page. Overrides by -[tT]\n" " -o -- Specify an output code page. Default is " "`%s'.\n\n" " -q -- Quiet mode (suspress warnings)\n" " -h -- Show this help message.\n\n", codepage[DEFAULT_OUTPUT_CP].cp_name); print_cplist(codepage); printf("\nVyachelav Anikin \n"); } void print_cplist(struct cpinfo *cplist) { struct cpinfo *curcp = cplist; printf("The valid identifiers of the code pages are:\n"); for (; curcp->cp_name; curcp++) { printf(" %s", curcp->cp_name); } printf("\n"); } struct cpinfo * detect_codepage(struct cp_detect *detinf, FILE *in, FILE *out, int fdoconv, float *cpexp) { int plank = 20; float cp_exp = .0; struct cpinfo *cur = codepage; for (; cur->cp_name; cur++) { cur->cp_exp = .0; } while (cp_exp < plank) { detinf->cp_datalen = fread(detinf->cp_databuf, 1, fbuf_size, in); i_cp = detect_cp(detinf, &cp_exp); if (cp_exp < 0.78 && f_lax != 0) { cp_exp = .0; } if (fdoconv != 0) { if (cp_exp == .0) { fwrite(detinf->cp_databuf, detinf->cp_datalen, 1, out); } else { add_buf(detinf->cp_databuf, detinf->cp_datalen); } } if (feof(in)) break; if (cp_exp != .0) plank -= 3; } *cpexp = i_cp->cp_exp; return i_cp; } /* Routine for converting of all buffers (if they exists) * and continuous converting of remaining part of file. */ void doconv_all(u_char *conv_tab, FILE *in, FILE *out) { int c; /* Input/output buffer */ if (head != NULL) { struct databuf *cur = head; while (cur) { recode_buf(conv_tab, cur->data, cur->size, out); cur = cur->next; } free_bufs(); } while ((c = getc(in)) != EOF) { putc(conv_tab[(u_char) c], out); } } /* Program standard error/warning format output */ int pwarnf(const char *fmt, ...) { int vf_ret; va_list args; if (f_quiet) { return 0; } va_start(args, fmt); fprintf(stderr, "rux: "); vf_ret = vfprintf(stderr, fmt, args); va_end(args); /* We need to flush stderr output before fatal exit :-) */ fflush(stderr); return vf_ret; } /* * Functions for supporting recoding buffers. Also there's * implemented the simplest one-way list routines (adding one node * and clearing of all). */ void recode_buf(u_char *conv_tab, u_char *buf, int len, FILE *out) { while (len--) { putc(conv_tab[*buf++], out); } } void add_buf(u_char *buf, int size) { struct databuf *t = (struct databuf *)dmalloc(sizeof(struct databuf)); t->data = (u_char *) dmalloc(size); t->size = size; t->next = NULL; memcpy(t->data, buf, size); if (head == NULL) { head = t; } else { last->next = t; } last = t; } void free_bufs() { struct databuf *cur = head; while (cur) { struct databuf *t = cur; cur = cur->next; dfree(t->data); dfree(t); t = NULL; } head = NULL; last = NULL; }