/* Copyright (C) 2006-2007 G.P. Halkes
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 3, as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include "definitions.h"
typedef enum {
NONE,
WHITESPACE,
WORD
} MatchState;
int differences = 0;
Statistics statistics;
/** Alert the user of a fatal error and quit.
@param fmt The format string for the message. See fprintf(3) for details.
@param ... The arguments for printing.
*/
void fatal(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(2);
}
/** Write a character to a token file, escaping as necessary.
@param file The file to write to.
@param c The character to write.
*/
static void writeTokenChar(FILE *file, int c) {
if (option.transliterate) {
if (c == '\n')
fputs("\\n", file);
else if (c == '\\')
fputs("\\\\", file);
#ifdef NO_MINUS_A
else if (!isprint(c))
fprintf(file, "\\%X", c);
#endif
else
putc(c, file);
} else {
putc(c, file);
}
}
/** Read a file and separate whitespace from the rest.
@param file The @a InputFile to read.
@return The number of "words" in @a file.
The separated parts of @a file are put into temporary files. The temporary
files' information is stored in the @a InputFile structure.
For runs in which the newline character is not included in the whitespace list,
the newline character is transliterated into the first character of the
whitespace list. Just before writing the output the characters are again
transliterated to restore the original text.
*/
static int readFile(InputFile *file) {
MatchState state = NONE;
int c, wordCount = 0;
if (file->name != NULL && (file->input = fopen(file->name, "r")) == NULL)
fatal(_("Can't open file %s: %s\n"), file->name, strerror(errno));
if ((file->tokens = tempFile()) == NULL)
fatal(_("Could not create temporary file: %s\n"), strerror(errno));
if ((file->whitespace = tempFile()) == NULL)
fatal(_("Could not create temporary file: %s\n"), strerror(errno));
while ((c = getc(file->input)) != EOF) {
switch (state) {
case NONE:
if (TEST_BIT(option.whitespace, c)) {
putc(c, file->whitespace->file);
state = WHITESPACE;
break;
}
putc(option.whitespaceDelimiter, file->whitespace->file);
writeTokenChar(file->tokens->file, c);
if (TEST_BIT(option.delimiters, c)) {
putc('\n', file->tokens->file);
state = WHITESPACE;
} else {
state = WORD;
}
break;
case WORD:
if (TEST_BIT(option.whitespace, c)) {
/* Found the end of a "word". Go to whitespace mode. */
wordCount++;
putc('\n', file->tokens->file);
putc(c, file->whitespace->file);
state = WHITESPACE;
} else if (TEST_BIT(option.delimiters, c)) {
/* Found a delimiter. Finish the current word, add a zero length whitespace
to the whitespace file, add the delimiter as a word, and go into
whitespace mode. */
wordCount += 2;
putc('\n', file->tokens->file);
writeTokenChar(file->tokens->file, c);
putc('\n', file->tokens->file);
putc(option.whitespaceDelimiter, file->whitespace->file);
state = WHITESPACE;
} else {
writeTokenChar(file->tokens->file, c);
}
break;
case WHITESPACE:
if (TEST_BIT(option.whitespace, c)) {
putc(c, file->whitespace->file);
} else if (TEST_BIT(option.delimiters, c)) {
/* Found a delimiter. Finish the current whitespace, and add the delimiter
as a word. Then start new whitespace. */
wordCount++;
writeTokenChar(file->tokens->file, c);
putc('\n', file->tokens->file);
putc(option.whitespaceDelimiter, file->whitespace->file);
} else {
/* Found the start of a word. Finish the whitespace, and go into
word mode. */
writeTokenChar(file->tokens->file, c);
putc(option.whitespaceDelimiter, file->whitespace->file);
state = WORD;
}
break;
default:
PANIC();
}
}
if (ferror(file->input))
fatal(_("Error reading file %s: %s\n"), file->name, strerror(errno));
if (ferror(file->whitespace->file) || ferror(file->tokens->file))
fatal(_("Error writing to temporary file %s: %s\n"), file->name, strerror(errno));
/* Make sure there is whitespace to end the output with. This may
be zero-length. */
putc(option.whitespaceDelimiter, file->whitespace->file);
/* Make sure the word is terminated, or otherwise diff will add
extra output. */
if (state == WORD) {
wordCount++;
putc('\n', file->tokens->file);
}
/* Close the input, and make sure the output is in the filesystem.
Then rewind so we can start reading from the start. */
fclose(file->input);
fflush(file->whitespace->file);
rewind(file->whitespace->file);
fflush(file->tokens->file);
rewind(file->tokens->file);
return wordCount;
}
/** Main. */
int main(int argc, char *argv[]) {
#ifdef USE_GETTEXT
setlocale(LC_ALL, "");
bindtextdomain("dwdiff", LOCALEDIR);
textdomain("dwdiff");
#endif
parseCmdLine(argc, argv);
statistics.oldTotal = readFile(&option.oldFile);
statistics.newTotal = readFile(&option.newFile);
doDiff();
if (option.statistics) {
int common = statistics.oldTotal - statistics.deleted - statistics.oldChanged;
printf(_("old: %d words %d %d%% common %d %d%% deleted %d %d%% changed\n"), statistics.oldTotal,
common, (common * 100)/statistics.oldTotal,
statistics.deleted, (statistics.deleted * 100) / statistics.oldTotal,
statistics.oldChanged, (statistics.oldChanged * 100) / statistics.oldTotal);
common = statistics.newTotal - statistics.added - statistics.newChanged;
printf(_("new: %d words %d %d%% common %d %d%% inserted %d %d%% changed\n"), statistics.newTotal,
common, (common * 100)/statistics.newTotal,
statistics.added, (statistics.added * 100) / statistics.newTotal,
statistics.newChanged, (statistics.newChanged * 100) / statistics.newTotal);
}
return differences;
}