/*
 * RUJIS ( Recover UJIS code from broken file )
 * $Header: rujis.c,v 0.2 92/09/24 takahasi Exp $
 * Copyright (C) 1992
 * Hironobu Takahashi (takahasi@tiny.or.jp)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either versions 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with KAKASI, see the file COPYING.  If not, write to the Free
 * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
/* $Log:	rujis.c,v $
 *
 */

#include <stdio.h>

#define TABLE_SIZE 64*94*84

#ifndef LIBDIR
#define LIBDIR "."
#endif
#define TABLE_NAME "ujis_tbl"

unsigned char *table;

#define RECOVER     0
#define LEARNING    1

int process_mode;

extern void exit();

usage(argv)
     char **argv;
{
    fprintf(stderr, "Usage:\n");
    fprintf(stderr, "\n");
    fprintf(stderr, " Recover:      %s    [ -t decode_table ] [input_file [output_file]]\n", argv[0]);
    fprintf(stderr, " Learning:     %s -l [ -t decode_table ] [input_file]\n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, " %s process only UJIS encoded files\n", argv[0]);
    fprintf(stderr, " default table is %s/%s\n", LIBDIR, TABLE_NAME);
    fprintf(stderr, "\n");
}

main(argc, argv)
     int argc;
     char **argv;
{
    FILE *input, *output;
    char table_name[256];
    int i;
    extern char *malloc();
    extern char *strcpy();

    input = stdin;
    output = stdout;
    process_mode = RECOVER;
    sprintf(table_name, "%s/%s", LIBDIR, TABLE_NAME);

    for (i = 1; i < argc; ++ i) {
	if (argv[i][0] == '-') {
	    switch(argv[i][1]) {
	      case 'l':
		process_mode = LEARNING;
		break;
	      case 't':
		if (strlen(argv[i]) <= 2)
		    strcpy (table_name, argv[++i]);
		else
		    strcpy (table_name, argv[i]+2);
		break;
	      default:
		usage(argv);
		exit(0);
	    }
	} else {
	    if (input == stdin) {
		if ((input = fopen(argv[i], "r")) == NULL) {
		    perror(argv[i]);
		    exit(1);
		}
	    } else {
		if ((output = fopen(argv[i], "w")) == NULL) {
		    perror(argv[i]);
		    exit(1);
		}
		break;
	    }
	}
    }

    table = (unsigned char *)malloc((unsigned)TABLE_SIZE);
    if (table == NULL) {
	fprintf(stderr, "%s: can't alloc memory\n", argv[0]);
	exit(1);
    }

    load_table(table_name);
    process(input, output);
    if (process_mode != RECOVER)
	put_table(table_name);
    return 0;
}

char_shift(in)
     int *in;
{
    int i;
    for (i = 0; i < 4; ++ i) {
	in[i] = in[i+1];
    }
}

process(input, output)
     FILE *input;
     FILE *output;
{
    int in[5], i;

    in[0] = '\n';
    in[1] = getc(input);
    in[2] = getc(input);
    in[3] = getc(input);
    while ((in[4] = getc(input)) != EOF) {
	switch (process_mode) {
	  case RECOVER:
	    convert_char(in);
	    putc(in[1], output);
	    char_shift(in);
	    if (in[0] & 0x80) {
		putc(in[1], output);
		in[4] = getc(input);
		char_shift(in);
	    }
	    break;
	  case LEARNING:
	    learn_char(in);
	    char_shift(in);
	    if (in[0] & 0x80) {
		in[4] = getc(input);
		char_shift(in);
	    }
	    break;
	}
    }
    if (process_mode != LEARNING) {
	if (in[3] != EOF) {
	    in[4] = '\n';
	    convert_char(in);
	    putc(in[1], output);
	    char_shift(in);
	    if (in[0] & 0x80) {
		putc(in[1], output);
		putc(in[2], output);
		return;
	    }
	}
	in[3] = in[4] = '\n';
	convert_char(in);
	putc(in[1], output);
	putc(in[2], output);
    }
}

int convert_char(in)
     int *in;
{
    if (maybeujis(in[1], in[2])) {
	if (table_req(in+1)) {
	    in[1] |= 0x80;
	    in[2] |= 0x80;
	}
    }
    return;
}

learn_char(in)
     int *in;
{
    if (maybeujis(in[1]&0x7f, in[2]&0x7f)) {
	if (in[1] & 0x80) {
	    table_set(in+1, 1);
	} else {
	    table_set(in+1, 0);
	}
    }
}

int
maybeujis(c1, c2)
int c1, c2;
{
    if ((c2 < 33) || (c2 > 126)) return 0;
    if ((c1 < 33) || ((40 < c1) && (c1 < 48)) || (116 < c1)) return 0;
    c2 -= 32;
    switch(c1-32) {
      case 2:
	if ((14 < c2) && ( c2 < 26)) return 0;
	if ((33 < c2) && ( c2 < 42)) return 0;
	if ((48 < c2) && ( c2 < 60)) return 0;
	if ((74 < c2) && ( c2 < 82)) return 0;
	if ((89 < c2) && ( c2 < 94)) return 0;
	break;
      case 3:
	if (c2 < 16) return 0;
	if ((25 < c2) && ( c2 < 33)) return 0;
	if ((58 < c2) && ( c2 < 65)) return 0;
	if (90 < c2) return 0;
	break;
      case 4:
	if (83 < c2) return 0;
	break;
      case 5:
	if (86 < c2) return 0;
	break;
      case 6:
	if ((24 < c2) && ( c2 < 33)) return 0;
	if (56 < c2) return 0;
	break;
      case 7:
	if ((33 < c2) && ( c2 < 49)) return 0;
	if (81 < c2) return 0;
	break;
      case 8:
	if (32 < c2) return 0;
	break;
      case 47:
	if (51 < c2) return 0;
	break;
      case 84:
	if (6 < c2) return 0;
	break;
    }
    return 1;
}

load_table(file_name)
     char *file_name;
{
    FILE *fp;
    register int i;

    if ((fp = fopen(file_name, "r")) == NULL) {
	for (i = 0; i < TABLE_SIZE; ++ i)
	    table[i] = 0x00;
    } else {
	fread((char *)table, TABLE_SIZE, 1, fp);
	fclose (fp);
    }
}

put_table(file_name)
     char *file_name;
{
    FILE *fp;

    if ((fp = fopen(file_name, "w")) == NULL) {
	perror(file_name);
	return;
    }
    fwrite((char *)table, TABLE_SIZE, 1, fp);
    fclose (fp);
}

unsigned char setmask[8] = {  1,   2,   4,   8,  16,  32,  64, 128};
unsigned char clrmask[8] = {254, 253, 251, 247, 239, 223, 191, 127};

table_set(c, value)
     int *c;
     int value;
{
    register int byte;

    byte = cbyte(c);
           
    if (value)
	table[byte/8] |= setmask[byte&7];
    else
	table[byte/8] &= clrmask[byte&7];
}

table_req(c)
     int *c;
{
    register int byte;

    byte = cbyte(c);

    return (table[byte/8] & setmask[byte&7]) ? 1 : 0;
}

int cbyte(c)
int *c;
{
    return (((c[0]&0x7f)-33)*84 + (c[1]&0x7f)-33) * 0x200 +
	   ((c[-1]&0x80)?0x100:0) +
	   ((c[-1]&0x08)?0x080:0) +
	   ((c[ 2]&0x40)?0x040:0) +
	   ((c[ 2]&0x10)?0x020:0) +
	   ((c[ 2]&0x04)?0x010:0) +
	   ((c[ 2]&0x02)?0x008:0) +
	   ((c[ 2]&0x01)?0x004:0) +
	   ((c[ 3]&0x40)?0x002:0) +
	   ((c[ 3]&0x04)?0x001:0);
}


syntax highlighted by Code2HTML, v. 0.9.1