/* * XML Catalog Manager (xmlcatmgr) * $Id: sgml.c,v 1.2 2004/08/31 21:25:47 jmmv Exp $ * * Copyright (c) 2003, 2004 Julio M. Merino Vidal. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. Neither the name of the author nor the names of contributors may * be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file implements all SGML mode actions, as well as the catalog * parser for SGML files. */ #include "system.h" #ifndef lint __RCSID("$Id: sgml.c,v 1.2 2004/08/31 21:25:47 jmmv Exp $"); #endif #include "grstr.h" #include "mem.h" #include "linklist.h" #include "sgml.h" /* This structure matches catalog entry types with the number of * arguments they require. Used during argument parsing and catalog * validation. */ static struct type { const char *t_name; int t_params; } Types[] = { { "BASE", 1 }, { "CATALOG", 1 }, { "DELEGATE", 2 }, { "DOCTYPE", 2 }, { "DOCUMENT", 1 }, { "ENTITY", 2 }, { "LINKTYPE", 2 }, { "NOTATION", 2 }, { "OVERRIDE", 1 }, { "PUBLIC", 2 }, { "SGMLDECL", 1 }, { "SYSTEM", 2 }, { NULL, 0 } }; /* Describes a catalog entry. * If e_type is ENTRY_NORMAL, e_field[123] are pointers to strings * with information about the entry. e_field3 is only defined when * Types[e_field1].t_params is 2. * Pointers in this structure are expected to be pointers to memory * regions allocated with malloc(3); they are later free(2)'d * unconditionally. */ struct entry { LINKLIST_ENTRY(entry); #define ENTRY_NORMAL 1 #define ENTRY_COMMENT 2 int e_type; char *e_comment; char *e_field1; char *e_field2; char *e_field3; }; /* This list type describes a catalog; a list of entries. */ LINKLIST_HEAD(entries, entry); /* Token types that we can find while reading a catalog file. */ #define TOKEN_ERROR 0 #define TOKEN_WORD 1 #define TOKEN_STRING 2 #define TOKEN_COMMENT 3 static bool add_entry(struct entries *, const char *, const char *, const char *, bool); static bool remove_entry(struct entries *, const char *, const char *); static void free_catalog(struct entries *); static void read_catalog(FILE *, struct entries *); static int read_token(FILE *, char **); static bool read_token_comment(FILE *, struct grstr *); static bool read_token_string(FILE *, struct grstr *); static bool read_token_word(FILE *, struct grstr *); static void write_catalog(FILE *, struct entries *); /* --------------------------------------------------------------------- */ /* * The SGML add action. For each triplet of arguments, add_entry is * called to register the given entry in the catalog. */ bool sgml_add(int argc, char *const *argv, FILE *f, bool prepend) { bool res; struct entries catalog; read_catalog(f, &catalog); res = true; while (argc > 0) { const char *type, *orig, *replace; if (argc == 1) { warnx("unbalanced arguments for `add' action"); res = false; argc--; argv++; } else { type = argv[0]; orig = argv[1]; argc -= 2; argv += 2; if (argc >= 1) { replace = strcmp(argv[0], "--") != 0 ? argv[0] : NULL; argc--; argv++; } else replace = NULL; res &= add_entry(&catalog, type, orig, replace, prepend); } } write_catalog(f, &catalog); free_catalog(&catalog); return res; } /* --------------------------------------------------------------------- */ /* * The SGML create action. Generates an empty catalog file, with a * single comment in it. */ bool sgml_create(FILE *f) { char buf[] = " Created by " PACKAGE_STRING " "; struct entry *e; struct entries catalog; LINKLIST_INIT(&catalog); e = (struct entry *)malloc(sizeof(struct entry)); e->e_type = ENTRY_COMMENT; e->e_comment = strdup(buf); LINKLIST_APPEND(&catalog, e); write_catalog(f, &catalog); free_catalog(&catalog); return true; } /* --------------------------------------------------------------------- */ /* * The SGML lookup action. Searches the given entries in the catalog * file. Only returns success if all of them were found. */ bool sgml_lookup(int argc, char *const *argv, FILE *f) { bool found, res; struct entry *iter; struct entries catalog; assert(argc > 0 && argv != NULL && f != NULL); read_catalog(f, &catalog); res = true; while (argc > 0) { found = false; LINKLIST_FOREACH(iter, &catalog) { if (iter->e_type == ENTRY_NORMAL) { assert(iter->e_field2 != NULL); if (strcmp(iter->e_field2, argv[0]) == 0) { found = true; if (iter->e_field3 == NULL) { printf("%s \"%s\"\n", iter->e_field1, iter->e_field2); } else { printf("%s \"%s\" \"%s\"\n", iter->e_field1, iter->e_field2, iter->e_field3); } } } } if (!found) { warnx("no matching entry for `%s'", argv[0]); res = false; } argc--; argv++; } free_catalog(&catalog); return res; } /* --------------------------------------------------------------------- */ /* * The SGML remove action. Removes all given entries from the catalog. * Arguments are expected to come in pairs, although if only one is * provided, all matching catalog entries are removed (compatibility with * previous versions). */ bool sgml_remove(int argc, char *const *argv, FILE *f) { bool res; struct entries catalog; if (argc == 0) { warnx("too few arguments for `remove' action"); return false; } read_catalog(f, &catalog); if (argc == 1) { warnx("enabling compatibility mode; removing ALL matching entries"); res = remove_entry(&catalog, NULL, argv[0]); } else { res = true; while (argc >= 2 && argc % 2 == 0) { res &= remove_entry(&catalog, argv[0], argv[1]); argc -= 2; argv += 2; } if (argc % 2 != 0) { warnx("unbalanced arguments for `remove' action"); res = false; } } write_catalog(f, &catalog); free_catalog(&catalog); return res; } /* --------------------------------------------------------------------- */ /* * Adds the given entry to the catalog. 'replace' may be null if the * given 'type' only expects one argument (this is determined from the * 'Types' array). */ static bool add_entry(struct entries *catalog, const char *type, const char *orig, const char *replace, bool prepend) { bool found; int i; struct entry *e; i = 0; found = false; while (!found && Types[i].t_name != NULL) { if (strcmp(Types[i].t_name, type) == 0) { found = true; } else { i++; } } if (!found) { warnx("unknown type `%s'", type); return false; } if ((Types[i].t_params == 1 && replace != NULL) || (Types[i].t_params == 2 && replace == NULL)) { warnx("parameter count mismatch for type `%s'", type); return false; } found = false; LINKLIST_FOREACH(e, catalog) { if (e->e_type == ENTRY_NORMAL) { assert(e->e_field1 != NULL); assert(e->e_field2 != NULL); if (strcmp(e->e_field1, type) == 0 && strcmp(e->e_field2, orig) == 0) { found = true; break; } } } if (found) { warnx("entry already exists for `%s' of type `%s'", orig, type); return false; } e = (struct entry *)malloc(sizeof(struct entry)); e->e_type = ENTRY_NORMAL; e->e_field1 = strdup(type); e->e_field2 = strdup(orig); e->e_field3 = replace == NULL ? NULL : strdup(replace); if (prepend) { LINKLIST_PREPEND(catalog, e); } else { LINKLIST_APPEND(catalog, e); } return true; } /* --------------------------------------------------------------------- */ /* * Removes the given entry from the catalog. If 'type' is NULL, removes * all matching entries, not only one. */ static bool remove_entry(struct entries *catalog, const char *type, const char *orig) { bool found; struct entry *iter; assert(catalog != NULL && orig != NULL); if (type != NULL) { int i = 0; found = false; while (!found && Types[i].t_name != NULL) { if (strcmp(Types[i].t_name, type) == 0) { found = true; } else { i++; } } if (!found) { warnx("unknown type `%s'", type); return false; } } found = false; iter = LINKLIST_FIRST(catalog); while (iter != NULL) { struct entry *tmp; tmp = LINKLIST_NEXT(iter); if (iter->e_type == ENTRY_NORMAL) { if (strcmp(iter->e_field2, orig) == 0 && (type == NULL || ((type != NULL) && (strcmp(iter->e_field1, type) == 0)))) { free(iter->e_field1); free(iter->e_field2); if (iter->e_field3 != NULL) free(iter->e_field3); LINKLIST_REMOVE(catalog, iter); free(iter); found = true; } } iter = tmp; } if (!found && type != NULL) { warnx("no matching entry for `%s' of type `%s'", orig, type); } else if (!found) { warnx("no matching entry for `%s' of any type", orig); } return found; } /* --------------------------------------------------------------------- */ /* * Deletes the given catalog, and all its entries. */ static void free_catalog(struct entries *catalog) { struct entry *iter; iter = LINKLIST_FIRST(catalog); while (iter != NULL) { struct entry *tmp; tmp = LINKLIST_NEXT(iter); if (iter->e_type == ENTRY_COMMENT) free(iter->e_comment); else { free(iter->e_field1); free(iter->e_field2); free(iter->e_field3); } free(iter); iter = tmp; } } /* --------------------------------------------------------------------- */ /* * Reads a catalog file, storing all entries found to the given catalog. */ static void read_catalog(FILE *f, struct entries *catalog) { bool found; char *token; int i, type; struct entry *e; LINKLIST_INIT(catalog); rewind(f); /* Read tokens from the catalog file until we hit EOF or an error. * Note that the function returns a new memory chunk in 'token', so * we have to handle it properly to not produce leaks. */ while ((type = read_token(f, &token)) != TOKEN_ERROR) { switch (type) { case TOKEN_COMMENT: /* Got a comment; add it to the catalog as is. */ e = (struct entry *)malloc(sizeof(struct entry)); e->e_type = ENTRY_COMMENT; e->e_comment = token; LINKLIST_APPEND(catalog, e); break; case TOKEN_WORD: /* Got a word. It must be the beginning of an entry, so we * have to check if it's valid by looking at the 'Types' * array. */ i = 0; found = false; while (!found && Types[i].t_name != NULL) { if (strcmp(Types[i].t_name, token) == 0) { found = true; } else { i++; } } if (!found) { warnx("unexpected token `%s'", token); free(token); } else { char *f1, *f2, *f3; int tktype; /* The token was a valid word (i.e., start of entry), so * we have to read one or two more words depending on the * entry type. */ f1 = token; f2 = f3 = NULL; while ((tktype = read_token(f, &f2)) == TOKEN_COMMENT) warnx("discarding comment '%s'", f2); if (tktype == TOKEN_ERROR) warnx("`%s' entry requires %d arguments", f1, Types[i].t_params); if (Types[i].t_params == 2) { while ((tktype = read_token(f, &f3)) == TOKEN_COMMENT) warnx("discarding comment '%s'", f3); if (tktype == TOKEN_ERROR) warnx("`%s' entry requires %d arguments", f1, Types[i].t_params); } e = (struct entry *)malloc(sizeof(struct entry)); e->e_type = ENTRY_NORMAL; e->e_field1 = f1; e->e_field2 = f2; e->e_field3 = f3; LINKLIST_APPEND(catalog, e); } break; default: warnx("unexpected token `%s'", token); free(token); break; } } } /* --------------------------------------------------------------------- */ /* * Reads the next token from the catalog file. As it is always a string, * we keep it in memory using a grstr object and then return a pointer to * the new memory chunk in the 'dest' output parameter. */ static int read_token(FILE *f, char **dest) { int ch, type; struct grstr *buf; buf = grstr_new(); if (buf == NULL) return TOKEN_ERROR; type = TOKEN_ERROR; while ((ch = fgetc(f)) != EOF && isspace(ch)); if (ch == EOF) { if (ferror(f)) warn("cannot read next character"); } else if (ch == '"') { /* Got a double quote character; this is the start of a string, * which is handled by read_token_string. */ if (read_token_string(f, buf)) type = TOKEN_STRING; } else if (ch == '-') { /* Got a dash character; this may be the start of a comment depending * on whether the next character is another dash or not. */ ch = fgetc(f); if (ch == EOF) { if (ferror(f)) warn("cannot read next character"); } else if (ch == '-') { /* Got two dashes; this is the start of a comment, which is * handled by read_token_comment. */ if (read_token_comment(f, buf)) type = TOKEN_COMMENT; } else { if (grstr_append_char(buf, '-') && grstr_append_char(buf, ch) && read_token_word(f, buf)) type = TOKEN_WORD; } } else { /* Got an unrecognized character: start of word. */ if (grstr_append_char(buf, ch) && read_token_word(f, buf)) type = TOKEN_WORD; } if (type != TOKEN_ERROR) *dest = grstr_to_text(buf); else { *dest = NULL; grstr_free(buf); } return type; } /* --------------------------------------------------------------------- */ /* * Read a comment from the catalog file (until we reach a sequence of two * consecutive dashes '--'. We assume that we have already picked the * first double quote character, so we start reading from within the * comment. */ static bool read_token_comment(FILE *f, struct grstr *gs) { bool res, error; int ch; res = false; error = false; while (!error) { while (!error && (ch = fgetc(f)) != EOF && ch != '-') { error |= !grstr_append_char(gs, ch); } ch = fgetc(f); if (ch == '-') break; else { error |= !grstr_append_char(gs, '-'); error |= !grstr_append_char(gs, ch); } } if (error || (ch == EOF && ferror(f))) warn("cannot read next character"); else res = true; return res; } /* --------------------------------------------------------------------- */ /* * Read a string from the catalog file (until we find another double * quote character). We assume that we have already picked the first * double quote character, so we start reading from within the string. */ static bool read_token_string(FILE *f, struct grstr *gs) { bool error, res; int ch; error = false; while (!error && (ch = fgetc(f)) != EOF && ch != '"') { error |= !grstr_append_char(gs, ch); } if (error || (ch == EOF && ferror(f))) { res = false; warn("cannot read next character"); } else res = true; return res; } /* --------------------------------------------------------------------- */ /* * Read a word from the catalog file (until we find a space character). * We assume that we have already picked the first letter and the caller * has added it to 'gs'. */ static bool read_token_word(FILE *f, struct grstr *gs) { bool error, res; int ch; error = false; while (!error && (ch = fgetc(f)) != EOF && !isspace(ch)) { if (ch == '-') { ch = fgetc(f); if (ch == '-') { struct grstr *tmp; tmp = grstr_new(); if (tmp == NULL) error = true; else { if (read_token_comment(f, tmp)) { char *text; text = grstr_to_text(tmp); warnx("discarding comment touching word `%s'", text); free(text); } else error = true; } } } else if (!grstr_append_char(gs, ch)) { error = true; } } if (error || (ch == EOF && ferror(f))) { res = false; warn("cannot read next character"); } else res = true; return res; } /* --------------------------------------------------------------------- */ /* * Write the given catalog to the file. This truncates the stream to * zero bytes before writing anything, to ensure the file contains no * garbage. */ static void write_catalog(FILE *f, struct entries *catalog) { struct entry *iter; rewind(f); fflush(f); ftruncate(fileno(f), 0); LINKLIST_FOREACH(iter, catalog) { if (iter->e_type == ENTRY_COMMENT) fprintf(f, "--%s--\n", iter->e_comment); else { if (iter->e_field3 == NULL) fprintf(f, "%s \"%s\"\n\n", iter->e_field1, iter->e_field2); else fprintf(f, "%s \"%s\"\n\t\"%s\"\n\n", iter->e_field1, iter->e_field2, iter->e_field3); } } } /* * Local Variables: *** * mode: c *** * c-file-style: "stroustrup" *** * End: *** * vim: syntax=c:expandtab:shiftwidth=4:softtabstop=4 */