/*
* RSJIS ( Recover SJIS code from broken file )
* $Header: rsjis.c,v 0.2 92/09/04 takahasi Exp $
* Copyright (C) 1992
* Hironobu Takahashi (takahasi@tiny.or.jp)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either versions 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with KAKASI, see the file COPYING. If not, write to the Free
* Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Log: rsjis.c,v $
*
*/
#include <stdio.h>
#define TABLE_SIZE 64*39*128
#ifndef LIBDIR
#define LIBDIR "."
#endif
#define TABLE_NAME "sjis_tbl"
unsigned char *table1, *table2;
#define RECOVER 0
#define LEARNING 1
int process_mode;
extern void exit();
usage(argv)
char **argv;
{
fprintf(stderr, "Usage:\n");
fprintf(stderr, "\n");
fprintf(stderr, " Recover: %s [ -t decode_table ] [input_file [output_file]]\n", argv[0]);
fprintf(stderr, " Learning: %s -l [ -t decode_table ] [input_file]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, " %s process only Shift JIS encoded files\n", argv[0]);
fprintf(stderr, " default table is %s/%s\n", LIBDIR, TABLE_NAME);
fprintf(stderr, "\n");
}
main(argc, argv)
int argc;
char **argv;
{
FILE *input, *output;
char table_name[256];
int i;
extern char *malloc();
extern char *strcpy();
input = stdin;
output = stdout;
process_mode = RECOVER;
sprintf(table_name, "%s/%s", LIBDIR, TABLE_NAME);
for (i = 1; i < argc; ++ i) {
if (argv[i][0] == '-') {
switch(argv[i][1]) {
case 'l':
process_mode = LEARNING;
break;
case 't':
if (strlen(argv[i]) <= 2)
strcpy (table_name, argv[++i]);
else
strcpy (table_name, argv[i]+2);
break;
default:
usage(argv);
exit(0);
}
} else {
if (input == stdin) {
if ((input = fopen(argv[i], "r")) == NULL) {
perror(argv[i]);
exit(1);
}
} else {
if ((output = fopen(argv[i], "w")) == NULL) {
perror(argv[i]);
exit(1);
}
break;
}
}
}
table1 = (unsigned char *)malloc((unsigned)TABLE_SIZE);
table2 = (unsigned char *)malloc((unsigned)TABLE_SIZE);
if ((table1 == NULL) || (table2 == NULL)) {
fprintf(stderr, "%s: can't alloc memory\n", argv[0]);
exit(1);
}
load_table(table_name);
process(input, output);
if (process_mode != RECOVER)
put_table(table_name);
return 0;
}
char_shift(in)
int *in;
{
int i;
for (i = 0; i < 4; ++ i) {
in[i] = in[i+1];
}
}
process(input, output)
FILE *input;
FILE *output;
{
int in[5], i;
in[0] = '\n';
in[1] = getc(input);
in[2] = getc(input);
in[3] = getc(input);
while ((in[4] = getc(input)) != EOF) {
switch (process_mode) {
case RECOVER:
convert_char(in);
putc(in[1], output);
char_shift(in);
if (in[0] & 0x80) {
putc(in[1], output);
in[4] = getc(input);
char_shift(in);
}
break;
case LEARNING:
learn_char(in);
char_shift(in);
if (in[0] & 0x80) {
in[4] = getc(input);
char_shift(in);
}
break;
}
}
if (process_mode != LEARNING) {
if (in[3] != EOF) {
in[4] = '\n';
convert_char(in);
putc(in[1], output);
char_shift(in);
if (in[0] & 0x80) {
putc(in[1], output);
putc(in[2], output);
return;
}
}
in[3] = in[4] = '\n';
convert_char(in);
putc(in[1], output);
putc(in[2], output);
}
}
int convert_char(in)
int *in;
{
int value1, value2;
int ret;
if (ret = maybesjis(in[1], in[2])) {
table_req(in+1, &value1, &value2);
if (value1) {
in[1] |= 0x80;
switch (ret) {
case 1:
break;
case 2:
in[2] |= 0x80;
break;
case 3:
if (value2)
in[2] |= 0x80;
}
}
}
return;
}
learn_char(in)
int *in;
{
if (maybesjis(in[1]&0x7f, in[2]&0x7f)) {
if (in[1] & 0x80) {
table_set(in+1, 1, (in[2]&0x80) ? 1 : 0);
} else {
table_set(in+1, 0, 0);
}
}
}
/*
81 : 40 - 7e 80 - ac b8 - bf c8 - ce da - e8 f0 - f7 fc - fc
82 : 4f - 58 60 - 79 81 - 9a 9f - f1
83 : 40 - 7e 80 - 96 9f - b6 bf - d6
84 : 40 - 60 70 - 7e 80 - 91 9f - be
88 : 9f - fc
89 - 97 : 40 - 7e 80 - fc
98 : 40 - 72 9f - fc
99 - 9f : 40 - 7e 80 - fc
e0 - e9 : 40 - 7e 80 - fc
ea : 40 - 7e 80 - a4
*/
int
maybesjis(c1, c2)
int c1, c2;
{
int result = 0;
switch(c1) {
case 0x01:
if ((0x40 <= c2) && (c2 <= 0x7e))
result = 1;
if ((c2 <= 0x2c) ||
((0x38 <= c2) && (c2 <= 0x3f)) ||
((0x48 <= c2) && (c2 <= 0x4e)) ||
((0x5a <= c2) && (c2 <= 0x68)) ||
((0x70 <= c2) && (c2 <= 0x77)) ||
(c2 == 0x7c))
result |= 2;
break;
case 0x02:
if (((0x4f <= c2) && (c2 <= 0x58)) ||
((0x60 <= c2) && (c2 <= 0x79)))
result = 1;
if (((0x01 <= c2) && (c2 <= 0x1a)) ||
((0x1f <= c2) && (c2 <= 0x71)))
result |= 2;
break;
case 0x03:
if ((0x40 <= c2) && (c2 <= 0x7e))
result = 1;
if ((c2 <= 0x16) ||
((0x1f <= c2) && (c2 <= 0x36)) ||
((0x3f <= c2) && (c2 <= 0x56)))
result |= 2;
break;
case 0x04:
if (((0x40 <= c2) && (c2 <= 0x60)) ||
((0x70 <= c2) && (c2 <= 0x7e)))
result = 1;
if (((c2 <= 0x11)) ||
((0x1f <= c2) && (c2 <= 0x3e)))
result |= 2;
break;
case 0x08:
if ((0x1f <= c2) && (c2 <= 0x7c))
result |= 2;
break;
case 0x18:
if ((0x40 <= c2) && (c2 <= 0x72))
result = 1;
if ((0x1f <= c2) && (c2 <= 0x7c))
result |= 2;
break;
case 0x6a:
if ((0x40 <= c2) && (c2 <= 0x7e))
result = 1;
if (c2 <= 0x24)
result |= 2;
break;
default:
if (((0x09 <= c1) && (c1 <= 0x17)) ||
((0x19 <= c1) && (c1 <= 0x1f)) ||
((0x60 <= c1) && (c1 <= 0x69))) {
if ((0x40 <= c2) && (c2 <= 0x7e))
result = 1;
if (c2 <= 0x7c)
result |= 2;
break;
}
}
return result;
}
load_table(file_name)
char *file_name;
{
FILE *fp;
register int i;
if ((fp = fopen(file_name, "r")) == NULL) {
for (i = 0; i < TABLE_SIZE; ++ i) {
table1[i] = 0x00;
table2[i] = 0xff;
}
} else {
fread((char *)table1, TABLE_SIZE, 1, fp);
fread((char *)table2, TABLE_SIZE, 1, fp);
fclose (fp);
}
}
put_table(file_name)
char *file_name;
{
FILE *fp;
if ((fp = fopen(file_name, "w")) == NULL) {
perror(file_name);
return;
}
fwrite((char *)table1, TABLE_SIZE, 1, fp);
fwrite((char *)table2, TABLE_SIZE, 1, fp);
fclose (fp);
}
unsigned char setmask[8] = { 1, 2, 4, 8, 16, 32, 64, 128};
unsigned char clrmask[8] = {254, 253, 251, 247, 239, 223, 191, 127};
table_set(c, value1, value2)
int *c;
int value1;
int value2;
{
register int byte;
byte = cbyte(c);
if (value1)
table1[byte/8] |= setmask[byte&7];
else
table1[byte/8] &= clrmask[byte&7];
if (value2)
table2[byte/8] |= setmask[byte&7];
else
table2[byte/8] &= clrmask[byte&7];
}
table_req(c, value1, value2)
int *c;
int *value1;
int *value2;
{
register int byte;
byte = cbyte(c);
*value1 = (table1[byte/8] & setmask[byte&7]) ? 1 : 0;
*value2 = (table2[byte/8] & setmask[byte&7]) ? 1 : 0;
}
int cbyte(c)
int *c;
{
register int p0;
p0 = c[0]&0x7f;
if (p0 <= 4)
p0 = p0-1;
else if (p0 <= 0x1f)
p0 = p0-4;
else
p0 = p0-0x44;
return (p0*128 + (c[1]&0x7f)) * 0x200 +
((c[-1]&0x80)?0x100:0) +
((c[-1]&0x40)?0x080:0) +
((c[-1]&0x20)?0x040:0) +
((c[-1]&0x08)?0x020:0) +
((c[ 3]&0x40)?0x010:0) +
((c[ 3]&0x20)?0x008:0) +
((c[ 3]&0x08)?0x004:0) +
((c[ 3]&0x04)?0x002:0) +
((c[ 3]&0x01)?0x001:0);
}
syntax highlighted by Code2HTML, v. 0.9.1