/*
 * RSJIS ( Recover SJIS code from broken file )
 * $Header: rsjis.c,v 0.2 92/09/04 takahasi Exp $
 * Copyright (C) 1992
 * Hironobu Takahashi (takahasi@tiny.or.jp)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either versions 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with KAKASI, see the file COPYING.  If not, write to the Free
 * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
/* $Log:	rsjis.c,v $
 *
 */

#include <stdio.h>

#define TABLE_SIZE 64*39*128

#ifndef LIBDIR
#define LIBDIR "."
#endif
#define TABLE_NAME "sjis_tbl"

unsigned char *table1, *table2;

#define RECOVER     0
#define LEARNING    1

int process_mode;

extern void exit();

usage(argv)
     char **argv;
{
    fprintf(stderr, "Usage:\n");
    fprintf(stderr, "\n");
    fprintf(stderr, " Recover:      %s    [ -t decode_table ] [input_file [output_file]]\n", argv[0]);
    fprintf(stderr, " Learning:     %s -l [ -t decode_table ] [input_file]\n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, " %s process only Shift JIS encoded files\n", argv[0]);
    fprintf(stderr, " default table is %s/%s\n", LIBDIR, TABLE_NAME);
    fprintf(stderr, "\n");
}

main(argc, argv)
     int argc;
     char **argv;
{
    FILE *input, *output;
    char table_name[256];
    int i;
    extern char *malloc();
    extern char *strcpy();

    input = stdin;
    output = stdout;
    process_mode = RECOVER;
    sprintf(table_name, "%s/%s", LIBDIR, TABLE_NAME);

    for (i = 1; i < argc; ++ i) {
	if (argv[i][0] == '-') {
	    switch(argv[i][1]) {
	      case 'l':
		process_mode = LEARNING;
		break;
	      case 't':
		if (strlen(argv[i]) <= 2)
		    strcpy (table_name, argv[++i]);
		else
		    strcpy (table_name, argv[i]+2);
		break;
	      default:
		usage(argv);
		exit(0);
	    }
	} else {
	    if (input == stdin) {
		if ((input = fopen(argv[i], "r")) == NULL) {
		    perror(argv[i]);
		    exit(1);
		}
	    } else {
		if ((output = fopen(argv[i], "w")) == NULL) {
		    perror(argv[i]);
		    exit(1);
		}
		break;
	    }
	}
    }

    table1 = (unsigned char *)malloc((unsigned)TABLE_SIZE);
    table2 = (unsigned char *)malloc((unsigned)TABLE_SIZE);
    if ((table1 == NULL) || (table2 == NULL)) {
	fprintf(stderr, "%s: can't alloc memory\n", argv[0]);
	exit(1);
    }

    load_table(table_name);
    process(input, output);
    if (process_mode != RECOVER)
	put_table(table_name);
    return 0;
}

char_shift(in)
     int *in;
{
    int i;
    for (i = 0; i < 4; ++ i) {
	in[i] = in[i+1];
    }
}

process(input, output)
     FILE *input;
     FILE *output;
{
    int in[5], i;

    in[0] = '\n';
    in[1] = getc(input);
    in[2] = getc(input);
    in[3] = getc(input);
    while ((in[4] = getc(input)) != EOF) {
	switch (process_mode) {
	  case RECOVER:
	    convert_char(in);
	    putc(in[1], output);
	    char_shift(in);
	    if (in[0] & 0x80) {
		putc(in[1], output);
		in[4] = getc(input);
		char_shift(in);
	    }
	    break;
	  case LEARNING:
	    learn_char(in);
	    char_shift(in);
	    if (in[0] & 0x80) {
		in[4] = getc(input);
		char_shift(in);
	    }
	    break;
	}
    }
    if (process_mode != LEARNING) {
	if (in[3] != EOF) {
	    in[4] = '\n';
	    convert_char(in);
	    putc(in[1], output);
	    char_shift(in);
	    if (in[0] & 0x80) {
		putc(in[1], output);
		putc(in[2], output);
		return;
	    }
	}
	in[3] = in[4] = '\n';
	convert_char(in);
	putc(in[1], output);
	putc(in[2], output);
    }
}

int convert_char(in)
     int *in;
{
    int value1, value2;
    int ret;

    if (ret = maybesjis(in[1], in[2])) {
	table_req(in+1, &value1, &value2);
	if (value1) {
	    in[1] |= 0x80;
	    switch (ret) {
	      case 1:
		break;
	      case 2:
		in[2] |= 0x80;
		break;
	      case 3:
		if (value2)
		    in[2] |= 0x80;
	    }
	}
    }
    return;
}

learn_char(in)
     int *in;
{
    if (maybesjis(in[1]&0x7f, in[2]&0x7f)) {
	if (in[1] & 0x80) {
	    table_set(in+1, 1, (in[2]&0x80) ? 1 : 0);
	} else {
	    table_set(in+1, 0, 0);
	}
    }
}

/*
81      : 40 - 7e 80 - ac b8 - bf c8 - ce da - e8 f0 - f7 fc - fc
82      : 4f - 58 60 - 79 81 - 9a 9f - f1
83      : 40 - 7e 80 - 96 9f - b6 bf - d6
84      : 40 - 60 70 - 7e 80 - 91 9f - be
88      : 9f - fc
89 - 97 : 40 - 7e 80 - fc
98      : 40 - 72 9f - fc
99 - 9f : 40 - 7e 80 - fc
e0 - e9 : 40 - 7e 80 - fc
ea      : 40 - 7e 80 - a4
*/

int
maybesjis(c1, c2)
int c1, c2;
{
    int result = 0;

    switch(c1) {
      case 0x01:
	if ((0x40 <= c2) && (c2 <= 0x7e))
	    result = 1;
	if ((c2 <= 0x2c) ||
	    ((0x38 <= c2) && (c2 <= 0x3f)) ||
	    ((0x48 <= c2) && (c2 <= 0x4e)) ||
	    ((0x5a <= c2) && (c2 <= 0x68)) ||
	    ((0x70 <= c2) && (c2 <= 0x77)) ||
	    (c2 == 0x7c))
	    result |= 2;
	break;
      case 0x02:
	if (((0x4f <= c2) && (c2 <= 0x58)) ||
	    ((0x60 <= c2) && (c2 <= 0x79)))
	    result = 1;
	if (((0x01 <= c2) && (c2 <= 0x1a)) ||
	    ((0x1f <= c2) && (c2 <= 0x71)))
	    result |= 2;
	break;
      case 0x03:
	if ((0x40 <= c2) && (c2 <= 0x7e))
	    result = 1;
	if ((c2 <= 0x16) ||
	    ((0x1f <= c2) && (c2 <= 0x36)) ||
	    ((0x3f <= c2) && (c2 <= 0x56)))
	    result |= 2;
	break;
      case 0x04:
	if (((0x40 <= c2) && (c2 <= 0x60)) ||
	    ((0x70 <= c2) && (c2 <= 0x7e)))
	    result = 1;
	if (((c2 <= 0x11)) ||
	    ((0x1f <= c2) && (c2 <= 0x3e)))
	    result |= 2;
	break;
      case 0x08:
	if ((0x1f <= c2) && (c2 <= 0x7c))
	    result |= 2;
	break;
      case 0x18:
	if ((0x40 <= c2) && (c2 <= 0x72))
	    result = 1;
	if ((0x1f <= c2) && (c2 <= 0x7c))
	    result |= 2;
	break;
      case 0x6a:
	if ((0x40 <= c2) && (c2 <= 0x7e))
	    result = 1;
	if (c2 <= 0x24)
	    result |= 2;
	break;
      default:
	if (((0x09 <= c1) && (c1 <= 0x17)) ||
	    ((0x19 <= c1) && (c1 <= 0x1f)) ||
	    ((0x60 <= c1) && (c1 <= 0x69))) {
	    if ((0x40 <= c2) && (c2 <= 0x7e))
		result = 1;
	    if (c2 <= 0x7c)
		result |= 2;
	    break;
	}
    }
    return result;
}

load_table(file_name)
     char *file_name;
{
    FILE *fp;
    register int i;

    if ((fp = fopen(file_name, "r")) == NULL) {
	for (i = 0; i < TABLE_SIZE; ++ i) {
	    table1[i] = 0x00;
	    table2[i] = 0xff;
	}
    } else {
	fread((char *)table1, TABLE_SIZE, 1, fp);
	fread((char *)table2, TABLE_SIZE, 1, fp);
	fclose (fp);
    }
}

put_table(file_name)
     char *file_name;
{
    FILE *fp;

    if ((fp = fopen(file_name, "w")) == NULL) {
	perror(file_name);
	return;
    }
    fwrite((char *)table1, TABLE_SIZE, 1, fp);
    fwrite((char *)table2, TABLE_SIZE, 1, fp);
    fclose (fp);
}

unsigned char setmask[8] = {  1,   2,   4,   8,  16,  32,  64, 128};
unsigned char clrmask[8] = {254, 253, 251, 247, 239, 223, 191, 127};

table_set(c, value1, value2)
     int *c;
     int value1;
     int value2;
{
    register int byte;

    byte = cbyte(c);

    if (value1)
	table1[byte/8] |= setmask[byte&7];
    else
	table1[byte/8] &= clrmask[byte&7];
    if (value2)
	table2[byte/8] |= setmask[byte&7];
    else
	table2[byte/8] &= clrmask[byte&7];
}

table_req(c, value1, value2)
     int *c;
     int *value1;
     int *value2;
{
    register int byte;

    byte = cbyte(c);

    *value1 = (table1[byte/8] & setmask[byte&7]) ? 1 : 0;
    *value2 = (table2[byte/8] & setmask[byte&7]) ? 1 : 0;
}

int cbyte(c)
int *c;
{
    register int p0;

    p0 = c[0]&0x7f;
    if (p0 <= 4)
	p0 = p0-1;
    else if (p0 <= 0x1f)
	p0 = p0-4;
    else
	p0 = p0-0x44;

    return (p0*128 + (c[1]&0x7f)) * 0x200 +
	   ((c[-1]&0x80)?0x100:0) +
	   ((c[-1]&0x40)?0x080:0) +
	   ((c[-1]&0x20)?0x040:0) +
	   ((c[-1]&0x08)?0x020:0) +
	   ((c[ 3]&0x40)?0x010:0) +
	   ((c[ 3]&0x20)?0x008:0) +
	   ((c[ 3]&0x08)?0x004:0) +
	   ((c[ 3]&0x04)?0x002:0) +
	   ((c[ 3]&0x01)?0x001:0);
}


syntax highlighted by Code2HTML, v. 0.9.1