/* Copyright (C) 2003-2004 Nadav Har'El and Dan Kenigsberg */ #include #include #include #include #include #include #include "hash.h" #include "hspell.h" #ifdef USE_LINGINFO #include "linginfo.h" #endif /* load_personal_dict tries to load ~/.hspell_words and ./hspell_words. Currently, they are read into a hash table, where each word in the file gets a non-zero value. Empty lines starting with # are ignored. Lines containing non-Hebrew characters aren't ignored, but they won't be tried as questioned words anyway. */ static void load_personal_dict(hspell_hash *personaldict) { int i; hspell_hash_init(personaldict); for(i=0; i<=1; i++){ char buf[512]; FILE *fp; if(i==0){ char *home = getenv("HOME"); if(!home) continue; snprintf(buf, sizeof(buf), "%s/.hspell_words", home); } else snprintf(buf, sizeof(buf), "./hspell_words"); fp=fopen(buf, "r"); if(!fp) continue; while(fgets(buf, sizeof(buf), fp)){ int l=strlen(buf); if(buf[l-1]=='\n') buf[l-1]='\0'; if(buf[0]!='#' && buf[0]!='\0') hspell_hash_incr_int(personaldict, buf); } fclose(fp); } } /* load_spelling_hints reads the spelling hints file (for the -n option). This is done in a somewhat ad-hoc manner. In particular, the repeat of the DICTIONARY_BASE here, also present in libhspell.c, is unfortunate. */ #ifndef DICTIONARY_BASE #define DICTIONARY_BASE "./hebrew.wgz" #endif char *flathints; int flathints_size; void load_spelling_hints(hspell_hash *spellinghints) { FILE *fp; char s[1000]; int len=0; int thishint=0; hspell_hash_init(spellinghints); flathints_size = 8192; /* initialize size (will grow as necessary) */ flathints = (char *)malloc(flathints_size); /*flathints[0]=0;*/ snprintf(s,sizeof(s),"gzip -dc '%s.hints'",DICTIONARY_BASE); fp = popen(s, "r"); if(!fp) { fprintf(stderr,"Failed to open %s\n",s); return; } while(fgets(s, sizeof(s), fp)){ int l=strlen(s); if(s[0]=='+') { /* this is a textual description line */ if(!thishint){ thishint=len; } /* reallocate the array, if no room */ while(len+l >= flathints_size){ flathints_size *= 2; flathints= (char *) realloc(flathints,flathints_size); } /* replace the '+' character by a space (this was the way hints were printed in version 0.5, and wee keep it for backward compatibility */ s[0]=' '; /*strncpy(flathints+len, s, flathints_size-len);*/ strcpy(flathints+len, s); len += l; } else if(s[0]=='\n'){ /* no more words for this hint */ thishint = 0; len++; } else { /* another word for this hint */ s[l-1]=0; hspell_hash_set_int(spellinghints, s, thishint); } } pclose(fp); } /* used for sorting later: */ static int compare_key(const void *a, const void *b){ register hspell_hash_keyvalue *aa = (hspell_hash_keyvalue *)a; register hspell_hash_keyvalue *bb = (hspell_hash_keyvalue *)b; return strcmp(aa->key, bb->key); } static int compare_value_reverse(const void *a, const void *b){ register hspell_hash_keyvalue *aa = (hspell_hash_keyvalue *)a; register hspell_hash_keyvalue *bb = (hspell_hash_keyvalue *)b; if(aa->value < bb->value) return 1; else if(aa->value > bb->value) return -1; else return 0; } static FILE * next_file(int *argcp, char ***argvp) { FILE *ret=0; if(*argcp<=0) return 0; while(*argcp && !ret){ ret=fopen((*argvp)[0],"r"); if(!ret) perror((*argvp)[0]); (*argvp)++; (*argcp)--; } return ret; } #define VERSION_IDENTIFICATION ("@(#) International Ispell Version 3.1.20 " \ "(but really Hspell/C %d.%d%s)\n") #define ishebrew(c) ((c)>=(int)(unsigned char)'א' && (c)<=(int)(unsigned char)'ת') int notify_split(const char *w, const char *baseword, int preflen, int prefspec) { char *desc,*stem; if(preflen>0){ printf("צירוף חוקי: %.*s+%s\n", preflen, w, baseword); } else if (!preflen){ printf("מילה חוקית: %s\n",w); } #ifdef USE_LINGINFO if (linginfo_lookup(baseword,&desc,&stem)) { int j; for (j=0; ;j++) { char buf[80]; if (!linginfo_desc2text(buf, desc, j)) break; if (linginfo_desc2ps(desc, j) & prefspec) { printf("\t%s(%s)",linginfo_stem2text(stem,j),buf); if (hspell_debug) printf("\t%d",linginfo_desc2ps(desc, j)); printf("\n"); } } } #endif return 1; } int main(int argc, char *argv[]) { struct dict_radix *dict; #define MAXWORD 30 char word[MAXWORD+1], *w; int wordlen=0, offset=0, wordstart; int c; int res; FILE *slavefp; int terse_mode=0; hspell_hash wrongwords; int preflen; /* used by -l */ hspell_hash personaldict; hspell_hash spellinghints; /* command line options */ char *progname=argv[0]; int interpipe=0; /* pipe interface (ispell -a like) */ int slave=0; /* there's a slave ispell process (-i option) */ int opt_s=0; /* -s option */ int opt_c=0; /* -c option */ int opt_l=0; /* -l option */ int opt_v=0; /* -v option (show version and quit) */ int opt_H=0; /* -H option (allow he ha-she'ela) */ int opt_n=0; /* -n option (provide spelling hints) */ /* TODO: when -a is not given, allow filename parameters, like the "spell" command does. */ FILE *in=stdin; /* Parse command-line options */ while((c=getopt(argc, argv, "clnsviad:BmVhT:CSPp:w:W:H"))!=EOF){ switch(c){ case 'a': interpipe=1; break; case 'i': slave=1; break; /* The following options do something on ispell or aspell, and some confused programs call hspell with them. We just ignore them silently, hoping that all's going to be well... */ case 'd': case 'B': case 'm': case 'T': case 'C': case 'S': case 'P': case 'p': case 'w': case 'W': /*fprintf(stderr, "Warning: ispell options -d, -B and " "-m are ignored by hspell.\n");*/ break; case 's': opt_s=1; break; case 'c': opt_c=1; break; case 'l': opt_l=1; break; case 'H': /* Allow "he ha-she'ela" */ opt_H=1; break; case 'n': opt_n=1; break; case 'v': opt_v++; break; case 'V': printf("Hspell %d.%d%s\nWritten by Nadav Har'El and " "Dan Kenigsberg\n", HSPELL_VERSION_MAJOR, HSPELL_VERSION_MINOR, HSPELL_VERSION_EXTRA); return 0; case 'h': case '?': fprintf(stderr,"hspell - Hebrew spellchecker\n" "Usage: %s [-acinslVH] [file ...]\n\n" "See hspell(1) manual for a description of " "hspell and its options.\n", progname); return 1; } } argc -= optind; argv += optind; /* The -v option causes ispell to print its current version identification on the standard output and exit. If the switch is doubled, ispell will also print the options that it was compiled with. */ if(opt_v){ printf(VERSION_IDENTIFICATION, HSPELL_VERSION_MAJOR, HSPELL_VERSION_MINOR, HSPELL_VERSION_EXTRA); return 0; } /* If the program name ends with "-i", we enable the -i option. This ugly hack is useful when a certain application can be given a different spell-checker, but not extra options to pass to it */ if(strlen(progname)>=2 && progname[strlen(progname)-2] == '-' && progname[strlen(progname)-1] == 'i'){ slave=interpipe=1; } if(interpipe){ /* for ispell -a like behavior, we want to flush every line: */ setlinebuf(stdout); } else { /* No "-a" option: UNIX spell-like mode: */ /* Set up hash-table for remembering the wrong words seen */ hspell_hash_init(&wrongwords); /* If we have any more arguments, treat them as files to spellcheck. Otherwise, just use stdin as set above. */ if(argc){ in=next_file(&argc, &argv); if(!in) return 1; /* nothing to do, really... */ } } if(hspell_init(&dict, (opt_H ? HSPELL_OPT_HE_SHEELA : 0) | (opt_l ? HSPELL_OPT_LINGUISTICS : 0))<0){ fprintf(stderr,"Sorry, could not read dictionary. Hspell " "was probably installed improperly.\n"); return 1; } load_personal_dict(&personaldict); if(opt_n) load_spelling_hints(&spellinghints); if(interpipe){ if(slave){ /* We open a pipe to an "ispell -a" process, letting it output directly to the user. We also let it output its own version string instead of ours. Is this wise? I don't know. Does anyone care? Note that we also don't make any attempts to catch broken pipes. */ slavefp=popen("ispell -a", "w"); if(!slavefp){ fprintf(stderr, "Warning: Cannot create slave " "ispell process. Disabling -i option.\n"); slave=0; } else { setlinebuf(slavefp); } } if(!slave) printf(VERSION_IDENTIFICATION, HSPELL_VERSION_MAJOR, HSPELL_VERSION_MINOR, HSPELL_VERSION_EXTRA); } for(;;){ c=getc(in); if(ishebrew(c) || c=='\'' || c=='"'){ /* swallow up another letter into the word (if the word * is too long, lose the last letters) */ if(wordlen0){ in=next_file(&argc, &argv); if(!in) break; } else break; } if(c=='\n'){ offset=0; if(interpipe && !slave) /*slave already outputs a newline...*/ printf("\n"); } else { offset++; } /* pass the character also to the slave, replacing Hebrew characters by spaces */ if(interpipe && slave) putc(ishebrew(c) ? ' ' : c, slavefp); } /* in spell-like mode (!interpipe) - list the wrong words */ if(!interpipe){ hspell_hash_keyvalue *wrongwords_array; int wrongwords_number; wrongwords_array = hspell_hash_build_keyvalue_array( &wrongwords, &wrongwords_number); if(wrongwords_number){ int i; if(opt_c) printf("שגיאות כתיב שנמצאו, ותיקוניהן " "המומלצים:\n\n"); else printf("שגיאות כתיב שנמצאו:\n\n"); /* sort word list by key or value (depending on -s option) */ qsort(wrongwords_array, wrongwords_number, sizeof(hspell_hash_keyvalue), opt_s ? compare_value_reverse : compare_key); for(i=0; i ", wrongwords_array[i].value, wrongwords_array[i].key); corlist_init(&cl); hspell_trycorrect(dict, wrongwords_array[i].key, &cl); for(j=0;j