/* inventory.c: * **************************************************************** * Copyright (C) 2003 Tom Lord * * See the file "COPYING" for further information about * the copyright and warranty status of this work. */ #include "hackerlab/char/str.h" #include "hackerlab/char/char-class.h" #include "hackerlab/mem/mem.h" #include "hackerlab/sort/qsort.h" #include "hackerlab/arrays/ar.h" #include "hackerlab/fs/file-names.h" #include "hackerlab/vu/safe.h" #include "tla/libarch/inventory.h" /* __STDC__ prototypes for static functions */ static int cmp_traversal_files (void * va, void * vb, void * ign); static t_uchar * explicit_tag (t_uchar * id_file, t_uchar * prefix, t_uchar * postfix); static t_uchar * implicit_tag (struct arch_id_program * prog, t_uchar * file); static long smash_non_graphical (t_uchar * buf, long amt); void arch_tree_traversal (t_uchar * root, arch_traversal_callback callback, void * closure) { DIR * dir; struct arch_traversal_file * files = 0; int x; safe_opendir (&dir, root); while (1) { char * name = 0; struct arch_traversal_file * this; safe_readdir (&name, dir); if (!name) break; if ((name[0] == '.') && ((name[1] == '.') || !name[1])) { lim_free (0, name); continue; } this = (struct arch_traversal_file *)ar_push ((void **)&files, 0, sizeof (*files)); this->name = name; this->path = file_name_in_vicinity (0, root, name); safe_stat (this->path, &this->stat); /* name freed from files->name */ } safe_closedir (dir); quicksort ((void *)files, ar_size ((void *)files, 0, sizeof (*files)), sizeof (*files), cmp_traversal_files, 0); for (x = 0; x < ar_size ((void *)files, 0, sizeof (*files)); ++x) { if (callback (&files[x], closure) && S_ISDIR (files[x].stat.st_mode)) { arch_tree_traversal (files[x].path, callback, closure); } } for (x = 0; x < ar_size ((void *)files, 0, sizeof (*files)); ++x) { lim_free (0, files[x].name); } ar_free ((void **)&files, 0); } static int cmp_traversal_files (void * va, void * vb, void * ign) { struct arch_traversal_file * a = (struct arch_traversal_file *)va; struct arch_traversal_file * b = (struct arch_traversal_file *)vb; return str_cmp (a->path, b->path); } void arch_id_program_compile_external (arch_id_program * prog) { prog->check_external_tags = 1; } void arch_id_program_compile_untagged_disposition (arch_id_program * prog, enum arch_inventory_category category) { prog->untagged_source_category = category; } void arch_id_program_compile_basename_embedded (arch_id_program * prog) { prog->check_embedded_tags = 1; prog->check_basename_embedded_tags = 1; } void arch_id_program_compile_embedded (arch_id_program * prog, t_uchar * regexp_source) { regex_t * new_re; int reg_error; new_re = (regex_t *)ar_push ((void **)&prog->regexp_list, 0, sizeof (*new_re)); mem_set0 ((void *)new_re, sizeof (*new_re)); reg_error = regcomp (new_re, regexp_source, REG_EXTENDED | REG_NOSUB | REG_NEWLINE); if (reg_error) { char err_msg[256]; regerror (reg_error, new_re, err_msg, sizeof (err_msg)); safe_printfmt (2, "arch: illegal regexp in =tagging-method (%s)\n regexp: %s\n", err_msg, regexp_source); exit (2); } } void arch_free_id_program_data (arch_id_program * prog) { int x; for (x = 0; x < ar_size ((void *)prog->regexp_list, 0, sizeof (regex_t)); ++x) { regfree (&prog->regexp_list[x]); } } t_uchar * arch_inventory_id (enum arch_inventory_category * category_ret, arch_id_program * prog, t_uchar * path, struct stat * opt_lstat) { t_uchar * as_file = 0; t_uchar * dir = 0; t_uchar * dir_basename = 0; t_uchar * basename = 0; t_uchar * answer = 0; struct stat lstat; if (!path) return 0; as_file = file_name_from_directory (0, path); if (!as_file) return 0; basename = file_name_tail (0, as_file); dir = file_name_directory_file (0, as_file); if (!dir) dir = str_save (0, "."); dir_basename = file_name_tail (0, dir); if (opt_lstat) lstat = *opt_lstat; else safe_lstat (as_file, &lstat); /* Special control file tags * * If we're using external or embedded tags, arch control * files are treated specially. */ if (prog->check_external_tags || prog->check_embedded_tags) { if (!str_cmp (dir_basename, ".arch-ids")) { /* Explicit tag files use their contents as tag, with the * prefix 'E'. */ answer = explicit_tag (as_file, "E_", 0); } else if (!str_cmp (basename, ".arch-ids")) { /* Explicit tag file directories: */ long amt; answer = str_alloc_cat (0, "D_", as_file); amt = smash_non_graphical (answer, str_length (answer)); answer[amt] = 0; } else if ( !str_cmp_prefix ("./{arch}/", as_file) || !str_cmp ("./{arch}", as_file)) { /* Paths beginning with "./{arch}" are tagged with their own * path name, with the prefix "A_". The presumptions are that these * files never move, and that if a file is present, its contents are * invariant. */ long amt; answer = str_alloc_cat (0, "A_", as_file); amt = smash_non_graphical (answer, str_length (answer)); answer[amt] = 0; } } if (answer) goto done; /* Next, check for explicit tags (but not =default explicit tags, yet. */ if (prog->check_external_tags) { t_uchar * id_file = 0; if (S_ISDIR (lstat.st_mode)) { id_file = file_name_in_vicinity (0, as_file, ".arch-ids/=id"); } else { id_file = file_name_in_vicinity (0, dir, ".arch-ids/"); id_file = str_realloc_cat_many (0, id_file, basename, ".id", str_end); } answer = explicit_tag (id_file, "x_", 0); if (!answer) { /* Is there a .arch-ids/=all file? */ lim_free (0, id_file); if (S_ISDIR (lstat.st_mode)) { id_file = file_name_in_vicinity (0, as_file, ".arch-ids/=all"); } else { id_file = file_name_in_vicinity (0, dir, ".arch-ids/=all"); } answer = explicit_tag (id_file, "a_", (S_ISDIR (lstat.st_mode) ? (t_uchar *)"./." : basename)); } lim_free (0, id_file); } if (answer) goto done; /* Next, check for embedded tags. */ if (prog->check_embedded_tags) { answer = implicit_tag (prog, as_file); } if (answer) goto done; /* check for an =default tag */ if (prog->check_external_tags) { t_uchar * id_file = 0; if (S_ISDIR (lstat.st_mode)) { id_file = file_name_in_vicinity (0, as_file, ".arch-ids/=default"); } else { id_file = file_name_in_vicinity (0, dir, ".arch-ids/=default"); } answer = explicit_tag (id_file, "w_", (S_ISDIR (lstat.st_mode) ? (t_uchar *)"./." : basename)); lim_free (0, id_file); } if (answer) goto done; /* check for a =dont-care tag */ if (prog->check_embedded_tags) { t_uchar * id_file = 0; long amt; id_file = file_name_in_vicinity (0, dir, ".arch-ids/=dont-care"); if (!safe_access (id_file, F_OK)) { answer = str_alloc_cat (0, "k_", as_file); } else { answer = str_alloc_cat (0, "?_", as_file); } amt = smash_non_graphical (answer, str_length (answer)); answer[amt] = 0; lim_free (0, id_file); } if (answer) goto done; /* apply an optional `names' tag */ if (prog->untagged_source_category == arch_inventory_source) { answer = str_alloc_cat (0, "?", as_file); } done: lim_free (0, as_file); lim_free (0, dir); lim_free (0, basename); if (answer) *category_ret = arch_inventory_source; else *category_ret = prog->untagged_source_category; return answer; } static t_uchar * explicit_tag (t_uchar * id_file, t_uchar * prefix, t_uchar * postfix) { int errn; int id_fd; t_uchar * answer; char buf[1024]; long amt; id_fd = vu_open (&errn, id_file, O_RDONLY, 0); if (id_fd < 0) return 0; answer = str_save (0, prefix); while (1) { t_uchar * eol; amt = vu_read_retry (&errn, id_fd, buf, sizeof (buf)); if (amt < 0) { lim_free (0, answer); vu_close (&errn, id_fd); return 0; } if (!amt) break; eol = str_chr_index_n (buf, amt, '\n'); if (!eol) { t_uchar * old_answer; amt = smash_non_graphical (buf, amt); old_answer = answer; answer = str_realloc_cat_n (0, answer, buf, amt); } else { t_uchar * old_answer; amt = eol - (t_uchar *)buf; amt = smash_non_graphical (buf, amt); old_answer = answer; answer = str_realloc_cat_n (0, answer, buf, amt); } } answer = str_realloc_cat (0, answer, (postfix ? postfix : (t_uchar *)"")); vu_close (&errn, id_fd); return answer; } static t_uchar * implicit_tag (struct arch_id_program * prog, t_uchar * file) { t_uchar * answer = 0; int errn; t_uchar * basename = 0; int file_fd; char buf[1025]; int amt; int line; int bottom; file_fd = vu_open (&errn, file, O_RDONLY, 0); if (file_fd < 0) return 0; if (prog->check_basename_embedded_tags) basename = file_name_tail (0, file); for (bottom = 1; bottom >= 0; --bottom) { if (!bottom) { if (0 > vu_lseek (&errn, file_fd, 0, SEEK_SET)) { int ign; error_return: vu_close (&ign, file_fd); return 0; } amt = vu_read_retry (&errn, file_fd, buf, sizeof (buf) - 1); if (amt < 0) goto error_return; } else { struct stat file_stat_buf; char * x; if (0 > vu_fstat (&errn, file_fd, &file_stat_buf)) goto error_return; if (file_stat_buf.st_size > sizeof (buf)) amt = sizeof (buf); else continue; if (0 > vu_lseek (&errn, file_fd, -1026, SEEK_END)) goto error_return; amt = vu_read_retry (&errn, file_fd, buf, sizeof (buf)); if (amt < 0) goto error_return; x = str_chr_index_n (buf, amt, '\n'); if (!x) continue; amt = amt - (1 + x - buf); mem_move (buf, x + 1, amt); } buf[amt] = 0; line = 0; while (1) { char * eol_pos; int eol; int x; /* preconditions: * * line <= amt * buf + line is the start of a line in the file * buf[amt] == 0 */ if (line >= amt) break; eol_pos = str_chr_index (buf + line, '\n'); if (eol_pos) eol = eol_pos - buf; else eol = amt; /* Does this line match any of the tag regexps? */ for (x = 0; x < ar_size ((void *)prog->regexp_list, 0, sizeof (regex_t)); ++x) { regmatch_t match_pos; regmatch_t * match_pos_addr = &match_pos; if (regnexec (&prog->regexp_list[x], buf + line, eol - line, (size_t)1, &match_pos_addr, 0)) { int start_of_tag; /* found the tag. It begins after any whitespace that follows * the end of the match. */ for (start_of_tag = line + match_pos.rm_eo; (start_of_tag < eol) && char_is_blank (buf[start_of_tag]); ++start_of_tag) ; { long size; size = smash_non_graphical (buf + start_of_tag, eol - start_of_tag); answer = str_alloc_cat_n (0, "i_", buf + start_of_tag, size); break; } } } /* Does this line use an old-style basename tag? */ if (prog->check_basename_embedded_tags) { int start_of_tag; start_of_tag = line; /* skip punctuation and blanks at the start of the line */ while ((start_of_tag < eol) && (char_is_punct (buf[start_of_tag]) || char_is_blank (buf[start_of_tag]))) ++start_of_tag; /* Looks dangerous but remember: buf[amt] == 0 */ if (str_cmp_prefix (basename, buf + start_of_tag)) { start_of_tag += str_length (basename); while ((start_of_tag < eol) && char_is_blank (buf[line])) ++line; if ((start_of_tag < eol) && (buf[start_of_tag] == '-')) { long size; ++start_of_tag; size = smash_non_graphical (buf + start_of_tag, eol - start_of_tag); answer = str_alloc_cat_n (0, "i_", buf + start_of_tag, size); break; } } } line = eol + 1; } } if (0 > vu_close (&errn, file_fd)) goto error_return; lim_free (0, basename); return answer; } static long smash_non_graphical (t_uchar * buf, long amt) { long x; while (amt > 0) { if (!char_is_graph (buf[amt - 1])) --amt; else break; } for (x = 0; x < amt; ++x) { if (!char_is_graph (buf[x])) buf[x] = '_'; } return amt; } /* tag: Tom Lord Tue Jul 22 13:17:12 2003 (inventory.c) */