/*
* RUJIS ( Recover UJIS code from broken file )
* $Header: rujis.c,v 0.2 92/09/24 takahasi Exp $
* Copyright (C) 1992
* Hironobu Takahashi (takahasi@tiny.or.jp)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either versions 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with KAKASI, see the file COPYING. If not, write to the Free
* Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* $Log: rujis.c,v $
*
*/
#include <stdio.h>
#define TABLE_SIZE 64*94*84
#ifndef LIBDIR
#define LIBDIR "."
#endif
#define TABLE_NAME "ujis_tbl"
unsigned char *table;
#define RECOVER 0
#define LEARNING 1
int process_mode;
extern void exit();
usage(argv)
char **argv;
{
fprintf(stderr, "Usage:\n");
fprintf(stderr, "\n");
fprintf(stderr, " Recover: %s [ -t decode_table ] [input_file [output_file]]\n", argv[0]);
fprintf(stderr, " Learning: %s -l [ -t decode_table ] [input_file]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, " %s process only UJIS encoded files\n", argv[0]);
fprintf(stderr, " default table is %s/%s\n", LIBDIR, TABLE_NAME);
fprintf(stderr, "\n");
}
main(argc, argv)
int argc;
char **argv;
{
FILE *input, *output;
char table_name[256];
int i;
extern char *malloc();
extern char *strcpy();
input = stdin;
output = stdout;
process_mode = RECOVER;
sprintf(table_name, "%s/%s", LIBDIR, TABLE_NAME);
for (i = 1; i < argc; ++ i) {
if (argv[i][0] == '-') {
switch(argv[i][1]) {
case 'l':
process_mode = LEARNING;
break;
case 't':
if (strlen(argv[i]) <= 2)
strcpy (table_name, argv[++i]);
else
strcpy (table_name, argv[i]+2);
break;
default:
usage(argv);
exit(0);
}
} else {
if (input == stdin) {
if ((input = fopen(argv[i], "r")) == NULL) {
perror(argv[i]);
exit(1);
}
} else {
if ((output = fopen(argv[i], "w")) == NULL) {
perror(argv[i]);
exit(1);
}
break;
}
}
}
table = (unsigned char *)malloc((unsigned)TABLE_SIZE);
if (table == NULL) {
fprintf(stderr, "%s: can't alloc memory\n", argv[0]);
exit(1);
}
load_table(table_name);
process(input, output);
if (process_mode != RECOVER)
put_table(table_name);
return 0;
}
char_shift(in)
int *in;
{
int i;
for (i = 0; i < 4; ++ i) {
in[i] = in[i+1];
}
}
process(input, output)
FILE *input;
FILE *output;
{
int in[5], i;
in[0] = '\n';
in[1] = getc(input);
in[2] = getc(input);
in[3] = getc(input);
while ((in[4] = getc(input)) != EOF) {
switch (process_mode) {
case RECOVER:
convert_char(in);
putc(in[1], output);
char_shift(in);
if (in[0] & 0x80) {
putc(in[1], output);
in[4] = getc(input);
char_shift(in);
}
break;
case LEARNING:
learn_char(in);
char_shift(in);
if (in[0] & 0x80) {
in[4] = getc(input);
char_shift(in);
}
break;
}
}
if (process_mode != LEARNING) {
if (in[3] != EOF) {
in[4] = '\n';
convert_char(in);
putc(in[1], output);
char_shift(in);
if (in[0] & 0x80) {
putc(in[1], output);
putc(in[2], output);
return;
}
}
in[3] = in[4] = '\n';
convert_char(in);
putc(in[1], output);
putc(in[2], output);
}
}
int convert_char(in)
int *in;
{
if (maybeujis(in[1], in[2])) {
if (table_req(in+1)) {
in[1] |= 0x80;
in[2] |= 0x80;
}
}
return;
}
learn_char(in)
int *in;
{
if (maybeujis(in[1]&0x7f, in[2]&0x7f)) {
if (in[1] & 0x80) {
table_set(in+1, 1);
} else {
table_set(in+1, 0);
}
}
}
int
maybeujis(c1, c2)
int c1, c2;
{
if ((c2 < 33) || (c2 > 126)) return 0;
if ((c1 < 33) || ((40 < c1) && (c1 < 48)) || (116 < c1)) return 0;
c2 -= 32;
switch(c1-32) {
case 2:
if ((14 < c2) && ( c2 < 26)) return 0;
if ((33 < c2) && ( c2 < 42)) return 0;
if ((48 < c2) && ( c2 < 60)) return 0;
if ((74 < c2) && ( c2 < 82)) return 0;
if ((89 < c2) && ( c2 < 94)) return 0;
break;
case 3:
if (c2 < 16) return 0;
if ((25 < c2) && ( c2 < 33)) return 0;
if ((58 < c2) && ( c2 < 65)) return 0;
if (90 < c2) return 0;
break;
case 4:
if (83 < c2) return 0;
break;
case 5:
if (86 < c2) return 0;
break;
case 6:
if ((24 < c2) && ( c2 < 33)) return 0;
if (56 < c2) return 0;
break;
case 7:
if ((33 < c2) && ( c2 < 49)) return 0;
if (81 < c2) return 0;
break;
case 8:
if (32 < c2) return 0;
break;
case 47:
if (51 < c2) return 0;
break;
case 84:
if (6 < c2) return 0;
break;
}
return 1;
}
load_table(file_name)
char *file_name;
{
FILE *fp;
register int i;
if ((fp = fopen(file_name, "r")) == NULL) {
for (i = 0; i < TABLE_SIZE; ++ i)
table[i] = 0x00;
} else {
fread((char *)table, TABLE_SIZE, 1, fp);
fclose (fp);
}
}
put_table(file_name)
char *file_name;
{
FILE *fp;
if ((fp = fopen(file_name, "w")) == NULL) {
perror(file_name);
return;
}
fwrite((char *)table, TABLE_SIZE, 1, fp);
fclose (fp);
}
unsigned char setmask[8] = { 1, 2, 4, 8, 16, 32, 64, 128};
unsigned char clrmask[8] = {254, 253, 251, 247, 239, 223, 191, 127};
table_set(c, value)
int *c;
int value;
{
register int byte;
byte = cbyte(c);
if (value)
table[byte/8] |= setmask[byte&7];
else
table[byte/8] &= clrmask[byte&7];
}
table_req(c)
int *c;
{
register int byte;
byte = cbyte(c);
return (table[byte/8] & setmask[byte&7]) ? 1 : 0;
}
int cbyte(c)
int *c;
{
return (((c[0]&0x7f)-33)*84 + (c[1]&0x7f)-33) * 0x200 +
((c[-1]&0x80)?0x100:0) +
((c[-1]&0x08)?0x080:0) +
((c[ 2]&0x40)?0x040:0) +
((c[ 2]&0x10)?0x020:0) +
((c[ 2]&0x04)?0x010:0) +
((c[ 2]&0x02)?0x008:0) +
((c[ 2]&0x01)?0x004:0) +
((c[ 3]&0x40)?0x002:0) +
((c[ 3]&0x04)?0x001:0);
}
syntax highlighted by Code2HTML, v. 0.9.1