/* * RSJIS ( Recover SJIS code from broken file ) * $Header: rsjis.c,v 0.2 92/09/04 takahasi Exp $ * Copyright (C) 1992 * Hironobu Takahashi (takahasi@tiny.or.jp) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either versions 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with KAKASI, see the file COPYING. If not, write to the Free * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* $Log: rsjis.c,v $ * */ #include #define TABLE_SIZE 64*39*128 #ifndef LIBDIR #define LIBDIR "." #endif #define TABLE_NAME "sjis_tbl" unsigned char *table1, *table2; #define RECOVER 0 #define LEARNING 1 int process_mode; extern void exit(); usage(argv) char **argv; { fprintf(stderr, "Usage:\n"); fprintf(stderr, "\n"); fprintf(stderr, " Recover: %s [ -t decode_table ] [input_file [output_file]]\n", argv[0]); fprintf(stderr, " Learning: %s -l [ -t decode_table ] [input_file]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, " %s process only Shift JIS encoded files\n", argv[0]); fprintf(stderr, " default table is %s/%s\n", LIBDIR, TABLE_NAME); fprintf(stderr, "\n"); } main(argc, argv) int argc; char **argv; { FILE *input, *output; char table_name[256]; int i; extern char *malloc(); extern char *strcpy(); input = stdin; output = stdout; process_mode = RECOVER; sprintf(table_name, "%s/%s", LIBDIR, TABLE_NAME); for (i = 1; i < argc; ++ i) { if (argv[i][0] == '-') { switch(argv[i][1]) { case 'l': process_mode = LEARNING; break; case 't': if (strlen(argv[i]) <= 2) strcpy (table_name, argv[++i]); else strcpy (table_name, argv[i]+2); break; default: usage(argv); exit(0); } } else { if (input == stdin) { if ((input = fopen(argv[i], "r")) == NULL) { perror(argv[i]); exit(1); } } else { if ((output = fopen(argv[i], "w")) == NULL) { perror(argv[i]); exit(1); } break; } } } table1 = (unsigned char *)malloc((unsigned)TABLE_SIZE); table2 = (unsigned char *)malloc((unsigned)TABLE_SIZE); if ((table1 == NULL) || (table2 == NULL)) { fprintf(stderr, "%s: can't alloc memory\n", argv[0]); exit(1); } load_table(table_name); process(input, output); if (process_mode != RECOVER) put_table(table_name); return 0; } char_shift(in) int *in; { int i; for (i = 0; i < 4; ++ i) { in[i] = in[i+1]; } } process(input, output) FILE *input; FILE *output; { int in[5], i; in[0] = '\n'; in[1] = getc(input); in[2] = getc(input); in[3] = getc(input); while ((in[4] = getc(input)) != EOF) { switch (process_mode) { case RECOVER: convert_char(in); putc(in[1], output); char_shift(in); if (in[0] & 0x80) { putc(in[1], output); in[4] = getc(input); char_shift(in); } break; case LEARNING: learn_char(in); char_shift(in); if (in[0] & 0x80) { in[4] = getc(input); char_shift(in); } break; } } if (process_mode != LEARNING) { if (in[3] != EOF) { in[4] = '\n'; convert_char(in); putc(in[1], output); char_shift(in); if (in[0] & 0x80) { putc(in[1], output); putc(in[2], output); return; } } in[3] = in[4] = '\n'; convert_char(in); putc(in[1], output); putc(in[2], output); } } int convert_char(in) int *in; { int value1, value2; int ret; if (ret = maybesjis(in[1], in[2])) { table_req(in+1, &value1, &value2); if (value1) { in[1] |= 0x80; switch (ret) { case 1: break; case 2: in[2] |= 0x80; break; case 3: if (value2) in[2] |= 0x80; } } } return; } learn_char(in) int *in; { if (maybesjis(in[1]&0x7f, in[2]&0x7f)) { if (in[1] & 0x80) { table_set(in+1, 1, (in[2]&0x80) ? 1 : 0); } else { table_set(in+1, 0, 0); } } } /* 81 : 40 - 7e 80 - ac b8 - bf c8 - ce da - e8 f0 - f7 fc - fc 82 : 4f - 58 60 - 79 81 - 9a 9f - f1 83 : 40 - 7e 80 - 96 9f - b6 bf - d6 84 : 40 - 60 70 - 7e 80 - 91 9f - be 88 : 9f - fc 89 - 97 : 40 - 7e 80 - fc 98 : 40 - 72 9f - fc 99 - 9f : 40 - 7e 80 - fc e0 - e9 : 40 - 7e 80 - fc ea : 40 - 7e 80 - a4 */ int maybesjis(c1, c2) int c1, c2; { int result = 0; switch(c1) { case 0x01: if ((0x40 <= c2) && (c2 <= 0x7e)) result = 1; if ((c2 <= 0x2c) || ((0x38 <= c2) && (c2 <= 0x3f)) || ((0x48 <= c2) && (c2 <= 0x4e)) || ((0x5a <= c2) && (c2 <= 0x68)) || ((0x70 <= c2) && (c2 <= 0x77)) || (c2 == 0x7c)) result |= 2; break; case 0x02: if (((0x4f <= c2) && (c2 <= 0x58)) || ((0x60 <= c2) && (c2 <= 0x79))) result = 1; if (((0x01 <= c2) && (c2 <= 0x1a)) || ((0x1f <= c2) && (c2 <= 0x71))) result |= 2; break; case 0x03: if ((0x40 <= c2) && (c2 <= 0x7e)) result = 1; if ((c2 <= 0x16) || ((0x1f <= c2) && (c2 <= 0x36)) || ((0x3f <= c2) && (c2 <= 0x56))) result |= 2; break; case 0x04: if (((0x40 <= c2) && (c2 <= 0x60)) || ((0x70 <= c2) && (c2 <= 0x7e))) result = 1; if (((c2 <= 0x11)) || ((0x1f <= c2) && (c2 <= 0x3e))) result |= 2; break; case 0x08: if ((0x1f <= c2) && (c2 <= 0x7c)) result |= 2; break; case 0x18: if ((0x40 <= c2) && (c2 <= 0x72)) result = 1; if ((0x1f <= c2) && (c2 <= 0x7c)) result |= 2; break; case 0x6a: if ((0x40 <= c2) && (c2 <= 0x7e)) result = 1; if (c2 <= 0x24) result |= 2; break; default: if (((0x09 <= c1) && (c1 <= 0x17)) || ((0x19 <= c1) && (c1 <= 0x1f)) || ((0x60 <= c1) && (c1 <= 0x69))) { if ((0x40 <= c2) && (c2 <= 0x7e)) result = 1; if (c2 <= 0x7c) result |= 2; break; } } return result; } load_table(file_name) char *file_name; { FILE *fp; register int i; if ((fp = fopen(file_name, "r")) == NULL) { for (i = 0; i < TABLE_SIZE; ++ i) { table1[i] = 0x00; table2[i] = 0xff; } } else { fread((char *)table1, TABLE_SIZE, 1, fp); fread((char *)table2, TABLE_SIZE, 1, fp); fclose (fp); } } put_table(file_name) char *file_name; { FILE *fp; if ((fp = fopen(file_name, "w")) == NULL) { perror(file_name); return; } fwrite((char *)table1, TABLE_SIZE, 1, fp); fwrite((char *)table2, TABLE_SIZE, 1, fp); fclose (fp); } unsigned char setmask[8] = { 1, 2, 4, 8, 16, 32, 64, 128}; unsigned char clrmask[8] = {254, 253, 251, 247, 239, 223, 191, 127}; table_set(c, value1, value2) int *c; int value1; int value2; { register int byte; byte = cbyte(c); if (value1) table1[byte/8] |= setmask[byte&7]; else table1[byte/8] &= clrmask[byte&7]; if (value2) table2[byte/8] |= setmask[byte&7]; else table2[byte/8] &= clrmask[byte&7]; } table_req(c, value1, value2) int *c; int *value1; int *value2; { register int byte; byte = cbyte(c); *value1 = (table1[byte/8] & setmask[byte&7]) ? 1 : 0; *value2 = (table2[byte/8] & setmask[byte&7]) ? 1 : 0; } int cbyte(c) int *c; { register int p0; p0 = c[0]&0x7f; if (p0 <= 4) p0 = p0-1; else if (p0 <= 0x1f) p0 = p0-4; else p0 = p0-0x44; return (p0*128 + (c[1]&0x7f)) * 0x200 + ((c[-1]&0x80)?0x100:0) + ((c[-1]&0x40)?0x080:0) + ((c[-1]&0x20)?0x040:0) + ((c[-1]&0x08)?0x020:0) + ((c[ 3]&0x40)?0x010:0) + ((c[ 3]&0x20)?0x008:0) + ((c[ 3]&0x08)?0x004:0) + ((c[ 3]&0x04)?0x002:0) + ((c[ 3]&0x01)?0x001:0); }