/* * RUJIS ( Recover UJIS code from broken file ) * $Header: rujis.c,v 0.2 92/09/24 takahasi Exp $ * Copyright (C) 1992 * Hironobu Takahashi (takahasi@tiny.or.jp) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either versions 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with KAKASI, see the file COPYING. If not, write to the Free * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* $Log: rujis.c,v $ * */ #include #define TABLE_SIZE 64*94*84 #ifndef LIBDIR #define LIBDIR "." #endif #define TABLE_NAME "ujis_tbl" unsigned char *table; #define RECOVER 0 #define LEARNING 1 int process_mode; extern void exit(); usage(argv) char **argv; { fprintf(stderr, "Usage:\n"); fprintf(stderr, "\n"); fprintf(stderr, " Recover: %s [ -t decode_table ] [input_file [output_file]]\n", argv[0]); fprintf(stderr, " Learning: %s -l [ -t decode_table ] [input_file]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, " %s process only UJIS encoded files\n", argv[0]); fprintf(stderr, " default table is %s/%s\n", LIBDIR, TABLE_NAME); fprintf(stderr, "\n"); } main(argc, argv) int argc; char **argv; { FILE *input, *output; char table_name[256]; int i; extern char *malloc(); extern char *strcpy(); input = stdin; output = stdout; process_mode = RECOVER; sprintf(table_name, "%s/%s", LIBDIR, TABLE_NAME); for (i = 1; i < argc; ++ i) { if (argv[i][0] == '-') { switch(argv[i][1]) { case 'l': process_mode = LEARNING; break; case 't': if (strlen(argv[i]) <= 2) strcpy (table_name, argv[++i]); else strcpy (table_name, argv[i]+2); break; default: usage(argv); exit(0); } } else { if (input == stdin) { if ((input = fopen(argv[i], "r")) == NULL) { perror(argv[i]); exit(1); } } else { if ((output = fopen(argv[i], "w")) == NULL) { perror(argv[i]); exit(1); } break; } } } table = (unsigned char *)malloc((unsigned)TABLE_SIZE); if (table == NULL) { fprintf(stderr, "%s: can't alloc memory\n", argv[0]); exit(1); } load_table(table_name); process(input, output); if (process_mode != RECOVER) put_table(table_name); return 0; } char_shift(in) int *in; { int i; for (i = 0; i < 4; ++ i) { in[i] = in[i+1]; } } process(input, output) FILE *input; FILE *output; { int in[5], i; in[0] = '\n'; in[1] = getc(input); in[2] = getc(input); in[3] = getc(input); while ((in[4] = getc(input)) != EOF) { switch (process_mode) { case RECOVER: convert_char(in); putc(in[1], output); char_shift(in); if (in[0] & 0x80) { putc(in[1], output); in[4] = getc(input); char_shift(in); } break; case LEARNING: learn_char(in); char_shift(in); if (in[0] & 0x80) { in[4] = getc(input); char_shift(in); } break; } } if (process_mode != LEARNING) { if (in[3] != EOF) { in[4] = '\n'; convert_char(in); putc(in[1], output); char_shift(in); if (in[0] & 0x80) { putc(in[1], output); putc(in[2], output); return; } } in[3] = in[4] = '\n'; convert_char(in); putc(in[1], output); putc(in[2], output); } } int convert_char(in) int *in; { if (maybeujis(in[1], in[2])) { if (table_req(in+1)) { in[1] |= 0x80; in[2] |= 0x80; } } return; } learn_char(in) int *in; { if (maybeujis(in[1]&0x7f, in[2]&0x7f)) { if (in[1] & 0x80) { table_set(in+1, 1); } else { table_set(in+1, 0); } } } int maybeujis(c1, c2) int c1, c2; { if ((c2 < 33) || (c2 > 126)) return 0; if ((c1 < 33) || ((40 < c1) && (c1 < 48)) || (116 < c1)) return 0; c2 -= 32; switch(c1-32) { case 2: if ((14 < c2) && ( c2 < 26)) return 0; if ((33 < c2) && ( c2 < 42)) return 0; if ((48 < c2) && ( c2 < 60)) return 0; if ((74 < c2) && ( c2 < 82)) return 0; if ((89 < c2) && ( c2 < 94)) return 0; break; case 3: if (c2 < 16) return 0; if ((25 < c2) && ( c2 < 33)) return 0; if ((58 < c2) && ( c2 < 65)) return 0; if (90 < c2) return 0; break; case 4: if (83 < c2) return 0; break; case 5: if (86 < c2) return 0; break; case 6: if ((24 < c2) && ( c2 < 33)) return 0; if (56 < c2) return 0; break; case 7: if ((33 < c2) && ( c2 < 49)) return 0; if (81 < c2) return 0; break; case 8: if (32 < c2) return 0; break; case 47: if (51 < c2) return 0; break; case 84: if (6 < c2) return 0; break; } return 1; } load_table(file_name) char *file_name; { FILE *fp; register int i; if ((fp = fopen(file_name, "r")) == NULL) { for (i = 0; i < TABLE_SIZE; ++ i) table[i] = 0x00; } else { fread((char *)table, TABLE_SIZE, 1, fp); fclose (fp); } } put_table(file_name) char *file_name; { FILE *fp; if ((fp = fopen(file_name, "w")) == NULL) { perror(file_name); return; } fwrite((char *)table, TABLE_SIZE, 1, fp); fclose (fp); } unsigned char setmask[8] = { 1, 2, 4, 8, 16, 32, 64, 128}; unsigned char clrmask[8] = {254, 253, 251, 247, 239, 223, 191, 127}; table_set(c, value) int *c; int value; { register int byte; byte = cbyte(c); if (value) table[byte/8] |= setmask[byte&7]; else table[byte/8] &= clrmask[byte&7]; } table_req(c) int *c; { register int byte; byte = cbyte(c); return (table[byte/8] & setmask[byte&7]) ? 1 : 0; } int cbyte(c) int *c; { return (((c[0]&0x7f)-33)*84 + (c[1]&0x7f)-33) * 0x200 + ((c[-1]&0x80)?0x100:0) + ((c[-1]&0x08)?0x080:0) + ((c[ 2]&0x40)?0x040:0) + ((c[ 2]&0x10)?0x020:0) + ((c[ 2]&0x04)?0x010:0) + ((c[ 2]&0x02)?0x008:0) + ((c[ 2]&0x01)?0x004:0) + ((c[ 3]&0x40)?0x002:0) + ((c[ 3]&0x04)?0x001:0); }