/************************************************************************* * Bulgarian-English Dictionary * Copyright (C) 2000 Radostin Radnev * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *************************************************************************/ #include #include #include "database.h" #include "translator.h" //=== Class Translator =================================================== // Written by Radostin Radnev - radnev@yahoo.com // $Id: translator.cpp,v 1.13 2001/03/11 22:41:46 radnev Exp $ // // This class represents Translator. // It holds databases and perform some text translation operations //======================================================================== // Declaration about max data and word length const int Translator::MAX_DATA_LEN = 150000; // Declaration about dictinonary constants const int Translator::ENG_BUL = 1; const int Translator::BUL_ENG = 2; // Declarations about Latin Input and Output const char Translator::ENG_CHARS[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ~`[{]}|\\:;'\""; const char Translator::BUL_CHARS[] = "АБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЪЗЧЧШШЩЩЮЮЫЫЭ"; const char Translator::LATIN_OUTPUT_CHARS[32][4] = { "a", "b", "v", "g", "d", "e", "j", "z", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "f", "h", "c", "ch", "sh", "sht", "y", "x", "x", "ei", "iu", "ia"}; // Declarations about English Suffixs const char Translator::ENG_SUFFIXS[3][4] = {"S", "ING", "ED"}; // Declaration about HTML decoration const char Translator::BOLD_START[] = ""; const char Translator::BOLD_END[] = ""; const char Translator::BREAK_LINE[] = "
"; const char Translator::A_S_HL_HTML_START[] = ""; const char Translator::A_S_HL_HTML_END[] = ""; const char Translator::A_S_HL_NORMAL_START[] = "<----*"; const char Translator::A_S_HL_NORMAL_END[] = "*---->"; // Declaration about start and stop bold decoration const char Translator::NON_ALPHA_CHARS[] = " ,.;:!?'-/\"\\"; // Declaration about word delimiter test const char Translator::TEST_DELIMITER = ','; //=== Constructor ======================================================== // Set defaults variables // Allocate memory //======================================================================== Translator::Translator(const int wordLength, const int wordNumber) { maxWordLength = wordLength; maxWordNumber = wordNumber; separateMeanings = true; latinOutput = false; latinInput = false; boldDecoration = true; htmlOutput = true; advancedSearchState = false; advancedSearchHighlight = true; advancedSearchWholeWord = true; advancedSearchExactPhrase = false; currentDictionary = ENG_BUL; testDictionary = ENG_BUL; testLevel = 0; dataBuffer = new char[MAX_DATA_LEN]; dataBuffer2 = new char[MAX_DATA_LEN/10]; wordBuffer = new char[maxWordLength + 1]; wordBuffer2 = new char[maxWordLength + 1]; tempBuffer = new char[(maxWordLength + 1)*maxWordNumber]; wordPlus = new bool[maxWordNumber]; } // End of Constructor //=== Destructor ========================================================= // Free memory and dictionary objects //======================================================================== Translator::~Translator() { delete [] dataBuffer; delete [] dataBuffer2; delete [] wordBuffer; delete [] wordBuffer2; delete [] tempBuffer; delete [] wordPlus; delete dicEB; delete dicBE; } // End of Destructor //=== Create Dictionary ================================================== // Real construcor // Call the same method of Database, and return same result //======================================================================== bool Translator::createDictionary(const char *fileName, const int dictionary, const long fixedLastWordPointer) { bool ret = false; switch (dictionary) { case ENG_BUL: dicEB = new Database(); ret = dicEB->createDictionary(fileName, fixedLastWordPointer); currentDic = dicEB; break; case BUL_ENG: dicBE = new Database(); ret = dicBE->createDictionary(fileName, fixedLastWordPointer); break; } return ret; } // End of createDictionary //=== To Lower Case ====================================================== // Transform characters to lower case work for bulgarian chars also // Change buffer and return pointer to new word //======================================================================== char *Translator::toLowerCase(const char *word, char *buf) { int i = 0; while (word[i] != '\0') { if (('A' <= word[i]) && (word[i] <= 'Z')) { buf[i] = tolower(word[i]); } else if (('А' <= word[i]) && (word[i] <= 'Я')) { buf[i] = word[i] - 'А' + 'а'; } else { buf[i] = word[i]; } i++; } buf[i] = '\0'; return buf; } // End of toLowerCase //=== To Upper Case ====================================================== // Transform characters to upper case work for bulgarian chars also // Change buffer and return pointer to new word //======================================================================== char *Translator::toUpperCase(const char *word, char *buf) { int i = 0; while (word[i] != '\0') { if (('a' <= word[i]) && (word[i] <= 'z')) { buf[i] = toupper(word[i]); } else if (('а' <= word[i]) && (word[i] <= 'я')) { buf[i] = word[i] + 'А' - 'а'; } else { buf[i] = word[i]; } i++; } buf[i] = '\0'; return buf; } // End of toUpperCase //=== To Legal Dictionary Word =========================================== // Transform word to legal dictionary word // Only letters and only upper case // Change buffer and return pointer to new word //======================================================================== char *Translator::toLegalDictionaryWord(const char *word, char *buf) { int i = 0; int j = 0; while (word[i] != '\0') { if (('a' <= word[i]) && (word[i] <= 'z')) { buf[j] = toupper(word[i]); j++; } else if (('а' <= word[i]) && (word[i] <= 'я')) { buf[j] = word[i] + 'А' - 'а'; j++; } else if ((('A' <= word[i]) && (word[i] <= 'Z')) || (('А' <= word[i]) && (word[i] <= 'Я'))) { buf[j] = word[i]; j++; } i++; } buf[j] = '\0'; return buf; } // End of toLegalDictionaryWord //=== Trim Word ========================================================== // Removing leading and ending spaces from word // Change buffer and return pointer to new word //======================================================================== char *Translator::trimWord(const char *word, char *buf) { bool add = false; int i = 0; int j = 0; while (word[i] != '\0') { if ((word[i] != ' ') || (add)) { add = true; buf[j] = word[i]; j++; } i++; } buf[j] = '\0'; while ((j > 0) && (buf[j - 1] == ' ')) { j--; buf[j] = '\0'; } return buf; } // End of trimWord //=== Is English Word Without Suffix ===================================== // Return true if passed word end with some suffix for example s, ed ... // Change buffer and return pointer to new word //======================================================================== bool Translator::isEnglishWordWithoutSuffix(const char *word, char *buf) { int j, i = 0; bool ret = false; char *p; strcpy(buf, word); while (i < 3) { p = strrchr(buf, ENG_SUFFIXS[i][0]); if (p != NULL) { j = 0; while ((*p != '\0') && (ENG_SUFFIXS[i][j] != '\0') && (*p == ENG_SUFFIXS[i][j])) { p++; j++; } if ((*p == '\0') && (ENG_SUFFIXS[i][j] == '\0')) { p = strrchr(buf, ENG_SUFFIXS[i][0]); *p = '\0'; ret = (strlen(buf) != 0); break; } } i++; } return ret; } // End of isEnglishWordWithoutSuffix //=== Is Latin Input ===================================================== // Return true if passed word is legal bulgarian word as LATIN_INPUT // Return pointer to new word that is legal dictionary word //======================================================================== bool Translator::isLatinInput(const char *word, char *buf, const bool ignoreSpace ) { int i = 0; char *p; while (word[i] != '\0') { if ((ignoreSpace) && (word[i] == ' ')) { buf[i] = word[i]; } else { p = strchr(ENG_CHARS, word[i]); if (p != NULL) { buf[i] = BUL_CHARS[p - ENG_CHARS]; } else { return false; } } i++; } buf[i] = '\0'; return true; } // End of isLatinInput //=== To Latin =========================================================== // Transfer Bulgarian word to legal input word for LATIN_INPUT or // to a normal latin output // Change buffer and return pointer to new word //======================================================================== char *Translator::toLatin(const char *word, char *buf, const bool legalLatinInput ) { int i = 0; int j = 0; char *p; char c; while ((c = word[i]) != '\0') { if (legalLatinInput) { p = strchr(BUL_CHARS, c); if (p != NULL) { buf[j] = ENG_CHARS[p - BUL_CHARS]; j++; } } else { if (('А' <= c) && (c <= 'Я')) { strcpy(buf + j, LATIN_OUTPUT_CHARS[c - 'А']); j += strlen(LATIN_OUTPUT_CHARS[c - 'А']); } else { buf[j] = c; j++; } } i++; } buf[j] = '\0'; return buf; } // End of toLatin //=== Go To Next Word ==================================================== // Call the same method of Database, and return same result //======================================================================== bool Translator::goToNextWord() { return currentDic->goToNextWord(); } // End of goToNextWord //=== Get Word =========================================================== // Call the same method of Database, and return same result // But perform some operations //======================================================================== char *Translator::getWord(const bool lowerCase, const bool legalLatinInput ) { char *ret = NULL; switch (currentDictionary) { case ENG_BUL: ret = dicEB->getWord(); if (lowerCase) { ret = toLowerCase(ret, wordBuffer); } break; case BUL_ENG: ret = dicBE->getWord(); if ((latinInput) || (latinOutput)) { ret = toLatin(ret, wordBuffer, legalLatinInput); if ((!lowerCase) && (!legalLatinInput)) { ret = toUpperCase(ret, wordBuffer); } } else { if (lowerCase) { ret = toLowerCase(ret, wordBuffer); } } break; } return ret; } // End of getWord //=== Find Word ========================================================== // Call the same method of Database, and return same result // But perform some transformation of data input and output data //======================================================================== bool Translator::findWord(const char *word, char **result) { char *p; bool ret = false; if (((latinInput) || (latinOutput)) && (isLatinInput(word, wordBuffer)) && (strlen(wordBuffer) > 0)) { currentDictionary = BUL_ENG; currentDic = dicBE; } else { toLegalDictionaryWord(word, wordBuffer); if (strlen(wordBuffer) == 0) { dataBuffer[0] = '\0'; *result = dataBuffer; return false; } setCurrentDictionary(wordBuffer[0]); } switch (currentDictionary) { case ENG_BUL: ret = dicEB->findWord(wordBuffer, &p); if ((!ret) && (isEnglishWordWithoutSuffix(wordBuffer, wordBuffer2)) && (strlen(wordBuffer2) > 1)) { ret = dicEB->findWord(wordBuffer2, &p); if (!ret) { ret = dicEB->findWord(wordBuffer, &p); } } break; case BUL_ENG: ret = dicBE->findWord(wordBuffer, &p); break; } *result = transformResult(p); return ret; } // End of findWord //=== Transform Result =================================================== // Perform some transformation operations depending from options // Store result in dataBuffer and return pointer to it //======================================================================== char *Translator::transformResult(const char *result) { int j, i, m, n; char *b; strcpy(dataBuffer2, result); if ((advancedSearchState) && (advancedSearchHighlight)) { for (i = 0; i < wordNumber; i++) { if (wordPlus[i]) { toUpperCase(dataBuffer2, dataBuffer); j = strlen(dataBuffer2); b = dataBuffer; n = 0; while ((b = search(b, tempBuffer + (maxWordLength + 1)*i, advancedSearchWholeWord)) != NULL) { m = b - dataBuffer + 2*n; memmove(dataBuffer2 + m + 1, dataBuffer2 + m, j - m + 2 + 2*n); dataBuffer2[m] = '|'; m += strlen(tempBuffer + (maxWordLength + 1)*i) + 1; memmove(dataBuffer2 + m + 1, dataBuffer2 + m, j - m + 2 + 2*n); dataBuffer2[m] = '|'; b += strlen(tempBuffer + (maxWordLength + 1)*i); n++; } } } } bool currentBold = false; bool currentColor = false; bool firstLineBreak = true; char c; i = 0; j = 0; while ((c = dataBuffer2[i]) != '\0') { if (c == '\n') { if (htmlOutput) { j += appendString(BREAK_LINE, j); if ((separateMeanings) && (!firstLineBreak)) { j += appendString(BREAK_LINE, j); } } else { dataBuffer[j] = '\n'; j++; if ((separateMeanings) && (!firstLineBreak)) { dataBuffer[j] = '\n'; j++; } } if (firstLineBreak) { firstLineBreak = false; } } else if ((advancedSearchState) && (advancedSearchHighlight) && (c == '|')) { if (htmlOutput) { // Fix the problem with cross tags if (currentBold) { j += appendString(BOLD_END, j); } // Append color end or color start tag j += appendString((currentColor ? A_S_HL_HTML_END : A_S_HL_HTML_START), j); // Fix the problem with cross tags if (currentBold) { // Fix the problem with Qt widget that doesn't // display space properly after the tag // " text" is buggy and converted to " text" if ((currentColor) && (dataBuffer2[i + 1] == ' ')) { dataBuffer[j] = ' '; j++; } j += appendString(BOLD_START, j); } } else { j += appendString((currentColor ? A_S_HL_NORMAL_END : A_S_HL_NORMAL_START), j); } currentColor = !currentColor; } else { if ((htmlOutput) && (boldDecoration)) { if (currentBold) { if (!((('A' <= c) && (c <= 'Z')) || (('a' <= c) && (c <= 'z'))) && (strchr(NON_ALPHA_CHARS, c) == NULL)) { j += appendString(BOLD_END, j); currentBold = false; } } else { if ((('A' <= c) && (c <= 'Z')) || (('a' <= c) && (c <= 'z'))) { j += appendString(BOLD_START, j); currentBold = true; } } } if (latinOutput) { if (('А' <= c) && (c <= 'Я')) { toUpperCase(LATIN_OUTPUT_CHARS[c - 'А'], wordBuffer2); j += appendString(wordBuffer2, j); } else if (('а' <= c) && (c <= 'я')) { j += appendString(LATIN_OUTPUT_CHARS[c - 'а'], j); } else { dataBuffer[j] = c; j++; } } else { dataBuffer[j] = c; j++; } } i++; } if (currentBold) { j += appendString(BOLD_END, j); } if (htmlOutput) { j += appendString(BREAK_LINE, j); } else { dataBuffer[j] = '\n'; j++; } dataBuffer[j] = '\0'; return dataBuffer; } // End of transformResult //=== Append String ====================================================== // Append passed data to the dataBuffer and return length of append data //======================================================================== int Translator::appendString(const char *data, const int currentPointer) { int i = 0; while (data[i] != '\0') { dataBuffer[currentPointer + i] = data[i]; i++; } return i; } // End of appendString //=== Gets Result ======================================================== // Gets and transform result of last search (move word pointer) operation //======================================================================== char *Translator::getResult() { return transformResult(currentDic->getResult()); } // End of getResult //=== Gets Next Random Word ============================================== // Gets next random word, perform some translations //======================================================================== char *Translator::getNextRandomWord() { do { currentDic->getNextRandomWord(); } while ((signed)strlen(currentDic->getResult()) < testLevel); return getWord(false, false); } // End of getNextRandomWord //=== Sets Test Dictionary =============================================== // Sets test dictionary (used only for exam) //======================================================================== void Translator::setTestParameters(const int dictionary, const int level) { testDictionary = dictionary; currentDictionary = dictionary; testLevel = level; switch (currentDictionary) { case ENG_BUL: currentDic = dicEB; break; case BUL_ENG: currentDic = dicBE; break; } } // End of setTestDictionary //=== Tests Word ========================================================= // Tests the user input and return true if user guess word //======================================================================== bool Translator::testWord(const char *word) { char *p; char *b; int l = 0; if (strlen(word) == 0) { return false; } // Get result and make upper case toUpperCase(currentDic->getResult(), dataBuffer); // Analize word strcpy(tempBuffer, word); p = tempBuffer - 1; while (p != NULL) { b = p + 1; p = strchr(b, TEST_DELIMITER); l = (p == NULL ? strlen(b) : p - b); if (l >= maxWordLength) { l = maxWordLength - 1; } strncpy(wordBuffer2, b, l); wordBuffer2[l] = '\0'; trimWord(wordBuffer2, wordBuffer2); if (((latinInput) || (latinOutput)) && (isLatinInput(wordBuffer2, wordBuffer)) && (strlen(wordBuffer) > 0)) { currentDictionary = BUL_ENG; currentDic = dicBE; } else { toUpperCase(wordBuffer2, wordBuffer); setCurrentDictionary(wordBuffer[0]); } currentDictionary = (currentDictionary == ENG_BUL ? BUL_ENG : ENG_BUL); currentDic = (currentDictionary == ENG_BUL ? dicEB : dicBE); if (currentDictionary != testDictionary) { currentDictionary = testDictionary; currentDic = (currentDictionary == ENG_BUL ? dicEB : dicBE); } else if (wordBuffer[0] != '\0') { if (search(dataBuffer, wordBuffer, true) != NULL) { return true; } } } return false; } // End of testWord //=== Set Advanced Search Text =========================================== // Sets advanced search text // Return true if text parsed without problems //======================================================================== bool Translator::setAdvancedSearchText(const char *word) { int i = 0; int j = 0; wordNumber = 0; bool string = false; bool closeWord = false; char c; char oldc = ' '; bool wPlus = true; while ((c = word[i]) != '\0') { if (c == '"') { string = !string; closeWord = true; } else if (string) { if (!((c == ' ') && (oldc == ' '))) { tempBuffer[(maxWordLength + 1)*wordNumber + j] = c; j++; } } else if ((c == '+') || (c == '-') || ((c == ' ') && (!advancedSearchExactPhrase))) { closeWord = true; } else { if (!((c == ' ') && (oldc == ' '))) { tempBuffer[(maxWordLength + 1)*wordNumber + j] = c; j++; } } if (closeWord) { if ((j > 1) || ((j == 1) && (tempBuffer[(maxWordLength + 1)*wordNumber] != ' '))) { tempBuffer[(maxWordLength + 1)*wordNumber + j] = '\0'; wordPlus[wordNumber] = wPlus; wordNumber++; if (wordNumber == maxWordNumber) { break; } } if (c == '-') { wPlus = false; } else if (c == '+') { wPlus = true; } else if ((c == ' ') && (j > 0)) { wPlus = true; } j = 0; } closeWord = false; oldc = c; i++; } if (j > 0) { if ((j > 1) || ((j == 1) && (tempBuffer[(maxWordLength + 1)*wordNumber] != ' '))) { tempBuffer[(maxWordLength + 1)*wordNumber + j] = '\0'; wordPlus[wordNumber] = wPlus; wordNumber++; } } if (wordNumber > 0) { for (i = 0; i < wordNumber; i++) { trimWord(tempBuffer + (maxWordLength + 1)*i, tempBuffer + (maxWordLength + 1)*i); if (((latinInput) || (latinOutput)) && (isLatinInput(tempBuffer + (maxWordLength + 1)*i, wordBuffer2, true))) { strcpy(tempBuffer + (maxWordLength + 1)*i, wordBuffer2); } toUpperCase(tempBuffer + (maxWordLength + 1)*i, tempBuffer + (maxWordLength + 1)*i); } advancedSearchState = true; setCurrentDictionary(tempBuffer[0]); firstTimeAdvancedSearch = true; firstDataBaseAdvancedSearch = true; return true; } return false; } // End of setAdvancedSearchText //=== Search Next Word =================================================== // Searchs for next word that mathc the specified advanced search text // Return true if find next word //======================================================================== bool Translator::searchNextWord() { int i; if (firstTimeAdvancedSearch) { currentDic->goToFirstWord(); firstTimeAdvancedSearch = false; } else { if (!currentDic->goToNextWord()) { return false; } } while (true) { do { toUpperCase(currentDic->getResult(), dataBuffer); for (i = 0; i < wordNumber; i++) { if (((wordPlus[i]) && (search(dataBuffer, tempBuffer + (maxWordLength + 1)*i, advancedSearchWholeWord) == NULL)) || ((!wordPlus[i]) && (search(dataBuffer, tempBuffer + (maxWordLength + 1)*i, advancedSearchWholeWord) != NULL))) { break; } } if (i == wordNumber) { return true; } } while (currentDic->goToNextWord()); if (firstDataBaseAdvancedSearch) { firstDataBaseAdvancedSearch = false; currentDictionary = (currentDictionary == ENG_BUL ? BUL_ENG : ENG_BUL); currentDic = (currentDictionary == ENG_BUL ? dicEB : dicBE); currentDic->goToFirstWord(); } else { break; } } return false; } // End of searchNextWord //=== Set Current Dictionayr ============================================= // Determine dictionary (ENG_BUL or BUL_ENG) //======================================================================== void Translator::setCurrentDictionary(const char c) { currentDictionary = (!(('А' <= c) && (c <= 'Я'))) ? ENG_BUL : BUL_ENG; currentDic = (currentDictionary == ENG_BUL ? dicEB : dicBE); } // End of setCurrentDictionary //=== Search ============================================================= // Searchs word in text and return pointer to first occurance or NULL //======================================================================== char *Translator::search(char *text, const char *word, const bool exactSearch) { if (exactSearch) { char *b = text - 1; while (b != NULL) { b++; b = strstr(b, word); if ((b != NULL) && ((b == text) || ((b > text) && (!isAlphaChar(text[b - text - 1])))) && (((b - text + strlen(word) < strlen(text)) && (!isAlphaChar(text[b - text + strlen(word)])) || (b - text + strlen(word) == strlen(text))))) { return b; } } } else { return strstr(text, word); } return NULL; } // End of search //=== Is Alpha Char ====================================================== // Return true if char is Alpha (Bulgarian or English letter) //======================================================================== bool Translator::isAlphaChar(const char c) { return ((('A' <= c) && (c <= 'Z')) || (('А' <= c) && (c <= 'Я')) || (('a' <= c) && (c <= 'z')) || (('а' <= c) && (c <= 'я'))); } // End of isAlphaChar //=== Extract Text ======================================================= // Extracts content from HTML formated text // Return pointer to extracted text // This function is useful for clipboard operation //======================================================================== char *Translator::extractText(const char *text) { int j = 0; int i = 0; bool tag = false; char c; while (((c = text[i]) != '\0') && (j < MAX_DATA_LEN)) { if (c == '<') { tag = true; if (strncmp(text + i, BREAK_LINE, strlen(BREAK_LINE)) == 0) { dataBuffer[j] = '\n'; j++; } } else if (c == '>') { tag = false; } else if (!tag) { dataBuffer[j] = c; j++; } i++; } dataBuffer[j] = '\0'; return dataBuffer; } // End of extractText