/* invent.c: project tree inventory library routines * * vim:smartindent ts=8:sts=2:sta:et:ai:shiftwidth=2 **************************************************************** * Copyright (C) 2002, 2003 Tom Lord * * See the file "COPYING" for further information about * the copyright and warranty status of this work. */ #include "hackerlab/bugs/exception.h" #include "hackerlab/bugs/panic.h" #include "hackerlab/os/errno.h" #include "hackerlab/os/errno-to-string.h" #include "hackerlab/mem/mem.h" #include "hackerlab/mem/talloc.h" #include "hackerlab/char/char-class.h" #include "hackerlab/char/str.h" #include "hackerlab/arrays/ar.h" #include "hackerlab/fs/file-names.h" #include "hackerlab/vu/safe.h" #include "libarch/inode-sig.h" #include "libarch/inv-ids.h" #include "libarch/invent.h" #include "libarch/pfs.h" #include "libarch/debug.h" struct directory_regexps { regex_t * regexps[sizeof (struct arch_inventory_regexps) / sizeof (regex_t)]; struct arch_inventory_regexps storage; #define DIR_REGEXP(dir_re, name) \ (dir_re)->regexps[offsetof (struct arch_inventory_regexps, name##_pattern) / sizeof (regex_t)] }; typedef struct { inv_callback callback; void * closure; assoc_table * explicit_skips; } inventory_state_t; /* __STDC__ prototypes for static functions */ static void copy_options_but_regexps (struct arch_inventory_options * dest, const struct arch_inventory_options * src); static int incompatible_options (struct arch_inventory_options const * left, struct arch_inventory_options const * src); static void cache_inventory_callback (void * closure, invent_callback_data_t const * const data); static int cached_changeset_destructor (void * data); static invent_callback_data_t * invent_cb_cache (invent_callback_data_t const * const data); static void source_inventory_callback (void * closure, invent_callback_data_t const * const data); static void source_inventory_files_callback (void * closure, invent_callback_data_t const * const data); static void arch_inventory_traversal_internal (struct arch_inventory_options * options, t_uchar const * user_dir, t_uchar const * tree_dir, inventory_state_t * state, assoc_table * explicit_skips, arch_project_tree_t * current_tree); static int cmp_files (const void * va, const void * vb); static int right_order_for_recursion (t_uchar * a, t_uchar * b); static int contains_illegal_character (char * filename); static int filename_matches (regex_t * pattern, char * filename); static int is_nested_tree (char * path); static int is_comment_line (t_uchar * line, long len); static int sets_re (char * kw, char ** re, t_uchar * line, long len); static int sets_id_tagging_method (char * kw, enum arch_id_tagging_method * method_var, enum arch_inventory_category * untagged_category_var, enum arch_id_tagging_method method, enum arch_inventory_category untagged_category, t_uchar * line, long len); static int sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var, t_uchar * saved_line, long saved_len); static void read_directory_regexps (struct directory_regexps * regexps, char const * dir_name); static void free_directory_regexps (struct directory_regexps * regexps); rel_table arch_source_inventory (arch_project_tree_t * tree, int include_ctl, int include_precious, int include_nested) { int here_fd; rel_table answer = 0; struct arch_inventory_options options; here_fd = safe_open (".", O_RDONLY, 0); safe_chdir (tree->root); mem_set0 ((t_uchar *)&options, sizeof (options)); options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0); options.want_ids = 1; options.method = arch_names_id_tagging; /* default only */ options.nested = include_nested; options.include_excluded = !!include_ctl; arch_get_inventory_naming_conventions (&options, tree); arch_inventory_traversal (&options, tree, source_inventory_callback, (void *)&answer); arch_free_inventory_naming_conventions (&options); safe_fchdir (here_fd); safe_close (here_fd); return answer; } rel_table arch_source_files_inventory (arch_project_tree_t * tree, int include_ctl, int include_precious) { int here_fd; rel_table answer = 0; struct arch_inventory_options options; here_fd = safe_open (".", O_RDONLY, 0); safe_chdir (tree->root); mem_set0 ((t_uchar *)&options, sizeof (options)); options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0); options.want_ids = 1; options.method = arch_names_id_tagging; /* default only */ options.nested = 0; options.include_excluded = !!include_ctl; /* this fills out the real used inventory method. */ arch_get_inventory_naming_conventions (&options, tree); arch_inventory_traversal (&options, tree, source_inventory_files_callback, (void *)&answer); arch_free_inventory_naming_conventions (&options); safe_fchdir (here_fd); safe_close (here_fd); return answer; } /** * \brief are two inventory options compatible in terms of the results ? * \return non 0 on incompatible */ int incompatible_options (struct arch_inventory_options const * left, struct arch_inventory_options const * right) { return left->categories != right->categories || left->method != right->method || left->want_ids != right->want_ids || left->treat_unrecognized_source_as_source != right->treat_unrecognized_source_as_source || left->nested != right->nested || left->include_excluded != right->include_excluded || left->override_method != right->override_method; } static void copy_options_but_regexps (struct arch_inventory_options * dest, const struct arch_inventory_options * src) { dest->categories = src->categories; dest->method = src->method; dest->want_ids = src->want_ids; dest->treat_unrecognized_source_as_source = src->treat_unrecognized_source_as_source; dest->nested = src->nested; dest->include_excluded = src->include_excluded; dest->override_method = src->override_method; } void source_inventory_callback (void * closure, invent_callback_data_t const * const data) { rel_table * answer = (rel_table *)closure; rel_add_records (answer, rel_make_record (data->path, data->id, 0), 0); } void source_inventory_files_callback (void * closure, invent_callback_data_t const * const data) { rel_table * answer = (rel_table *)closure; if (!S_ISDIR (data->stat_buf.st_mode)) rel_add_records (answer, rel_make_record (data->path, data->id, 0), 0); } t_uchar * arch_default_naming_conventions_regexp (enum arch_inventory_category cat) { switch (cat) { default: { panic ("unrecognized inventory category (arch_default_naming_conventions_regexp)"); return 0; /* not reached */ } case arch_inventory_source: { return str_save (0, "."); } case arch_inventory_precious: { return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS|tags|cscope.*\\.out|\\.svn)$"); } case arch_inventory_backup: { return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.sw(o|p)|\\.orig|\\.rej|\\.original|\\.modified|\\.reject|\\.(o|a|so|core|so(\\.[[:digit:]]+)*))$|^core$"); } case arch_inventory_junk: { return str_save (0, "^(,.*)$"); } case arch_inventory_unrecognized: { return str_save (0, "^$"); } case arch_inventory_excludes: { return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$"); } } } /* DO NOT under ANY circumastances change these. They predate pre-populating * tagging-method and will break old archives if altered. */ t_uchar * arch_ancient_default_naming_conventions_regexp (enum arch_inventory_category cat) { switch (cat) { default: { panic ("unrecognized inventory category (arch_ancient_default_naming_conventions_regexp)"); return 0; /* not reached */ } case arch_inventory_source: { return str_save (0, "^([_=a-zA-Z0-9].*|\\.arch-ids|\\{arch\\}|\\.arch-project-tree)$"); } case arch_inventory_precious: { return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS)$"); } case arch_inventory_backup: { return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.orig|\\.rej|\\.original|\\.modified|\\.reject)$"); } case arch_inventory_junk: { return str_save (0, "^(,.*)$"); } case arch_inventory_unrecognized: { return str_save (0, "^(.*\\.(o|a|so|core)|core)$"); } case arch_inventory_excludes: { return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$"); } } } t_uchar * arch_null_default_naming_conventions_regexp (enum arch_inventory_category cat) { switch (cat) { default: { panic ("unrecognized inventory category (arch_null_default_naming_conventions_regexp)"); return 0; /* not reached */ } case arch_inventory_source: { return str_save (0, ".*"); } case arch_inventory_precious: { return str_save (0, "^$"); } case arch_inventory_backup: { return str_save (0, "^$"); } case arch_inventory_junk: { return str_save (0, "^$"); } case arch_inventory_unrecognized: { return str_save (0, "^$"); } case arch_inventory_excludes: { return str_save (0, "^(.arch-ids|\\{arch\\})$"); } } } void arch_get_inventory_naming_conventions (struct arch_inventory_options * options, arch_project_tree_t * tree) { char * excludes = 0; char * junk = 0; char * backup = 0; char * precious = 0; char * unrecognized = 0; char * source = 0; int re_error; if (tree && tree->root) { t_uchar * id_tagging_method_file; id_tagging_method_file = arch_tree_id_tagging_method_file (tree); if (!safe_access (id_tagging_method_file, F_OK)) { int in_fd; t_uchar * line; long len; enum arch_id_tagging_method tree_method = arch_names_id_tagging; enum arch_inventory_category untagged_source_category = arch_inventory_precious; in_fd = safe_open (id_tagging_method_file, O_RDONLY, 0); while (1) { safe_next_line (&line, &len, in_fd); if (!len) break; (void)(!is_comment_line (line, len) && !sets_id_tagging_method ("implicit", &tree_method, &untagged_source_category, arch_implicit_id_tagging, arch_inventory_source, line, len) && !sets_id_tagging_method ("tagline", &tree_method, &untagged_source_category, arch_tagline_id_tagging, arch_inventory_source, line, len) && !sets_id_tagging_method ("explicit", &tree_method, &untagged_source_category, arch_explicit_id_tagging, arch_inventory_precious, line, len) && !sets_id_tagging_method ("names", &tree_method, &untagged_source_category, arch_names_id_tagging, arch_inventory_source, line, len) && !sets_untagged_source_disposition (&untagged_source_category, line, len) && !sets_re ("exclude", &excludes, line, len) && !sets_re ("junk", &junk, line, len) && !sets_re ("backup", &backup, line, len) && !sets_re ("precious", &precious, line, len) && !sets_re ("unrecognized", &unrecognized, line, len) && !sets_re ("source", &source, line, len)); } safe_close (in_fd); if (!options->override_method) { options->method = tree_method; options->untagged_source_category = untagged_source_category; } } /* default naming conventions. */ if (excludes == NULL) excludes = arch_ancient_default_naming_conventions_regexp (arch_inventory_excludes); if (junk == NULL) junk = arch_ancient_default_naming_conventions_regexp (arch_inventory_junk); if (backup == NULL) backup = arch_ancient_default_naming_conventions_regexp (arch_inventory_backup); if (precious == NULL) precious = arch_ancient_default_naming_conventions_regexp (arch_inventory_precious); if (unrecognized == NULL) unrecognized = arch_ancient_default_naming_conventions_regexp (arch_inventory_unrecognized); if (source == NULL) source = arch_ancient_default_naming_conventions_regexp (arch_inventory_source); lim_free (0, id_tagging_method_file); } else { excludes = arch_null_default_naming_conventions_regexp (arch_inventory_excludes); junk = arch_null_default_naming_conventions_regexp (arch_inventory_junk); backup = arch_null_default_naming_conventions_regexp (arch_inventory_backup); precious = arch_null_default_naming_conventions_regexp (arch_inventory_precious); unrecognized = arch_null_default_naming_conventions_regexp (arch_inventory_unrecognized); source = arch_null_default_naming_conventions_regexp (arch_inventory_source); } /* compile the conventions. */ re_error = regcomp (&options->regexps.excludes_pattern, excludes, REG_EXTENDED); if (re_error) panic ("bogus id-tagging-method regexp for `excludes'"); re_error = regcomp (&options->regexps.junk_pattern, junk, REG_EXTENDED); if (re_error) panic ("bogus id-tagging-method regexp for `junk'"); re_error = regcomp (&options->regexps.backup_pattern, backup, REG_EXTENDED); if (re_error) panic ("bogus id-tagging-method regexp for `backup'"); re_error = regcomp (&options->regexps.precious_pattern, precious, REG_EXTENDED); if (re_error) panic ("bogus id-tagging-method regexp for `precious'"); re_error = regcomp (&options->regexps.unrecognized_pattern, unrecognized, REG_EXTENDED); if (re_error) panic ("bogus id-tagging-method regexp for `unrecognized'"); re_error = regcomp (&options->regexps.source_pattern, source, REG_EXTENDED); if (re_error) panic ("bogus id-tagging-method regexp for `source'"); lim_free (0, excludes); lim_free (0, junk); lim_free (0, backup); lim_free (0, precious); lim_free (0, unrecognized); lim_free (0, source); } void arch_free_inventory_naming_conventions (struct arch_inventory_options * options) { regfree (&options->regexps.excludes_pattern); regfree (&options->regexps.junk_pattern); regfree (&options->regexps.backup_pattern); regfree (&options->regexps.precious_pattern); regfree (&options->regexps.unrecognized_pattern); regfree (&options->regexps.source_pattern); } void arch_inventory_traversal (struct arch_inventory_options * options, arch_project_tree_t * tree, inv_callback callback, void * closure) { arch_inventory_traversal_path (options, tree, ".", callback, closure); } void arch_inventory_traversal_path (struct arch_inventory_options * options, arch_project_tree_t * tree, t_uchar const * const path, inv_callback callback, void * closure) { assoc_table saved_id_tagging_shortcut = 0; assoc_table explicit_skips = 0; inventory_state_t state; t_uchar * tree_rel_path; saved_id_tagging_shortcut = tree->id_tagging_shortcut; if ((options->method == arch_implicit_id_tagging) || (options->method == arch_tagline_id_tagging) || (options->method == arch_explicit_id_tagging)) arch_read_id_shortcut (&tree->id_tagging_shortcut, tree->root); state.callback = callback; state.closure = closure; tree_rel_path = arch_abs_path (path); tree_rel_path = str_replace (tree_rel_path, arch_project_tree_rel_path_from_abs (tree, tree_rel_path)); arch_inventory_traversal_internal (options, path, tree_rel_path, &state, &explicit_skips, tree); free_assoc_table (tree->id_tagging_shortcut); tree->id_tagging_shortcut = saved_id_tagging_shortcut; free_assoc_table (explicit_skips); lim_free (0, tree_rel_path); } AR_TYPEDEF(char *, charstr); AR_TYPEDEF(int, legint); static void do_file_or_deferred (int *deferred_recursions_head, int * deferred_recursions_tail, int * x, int const n_files, ar_charstr files, ar_legint deferred_recursions, ar_legint is_deferred_nested, t_uchar const * const user_dir, t_uchar const * const tree_dir, arch_project_tree_t * current_tree, struct arch_inventory_options * options, inventory_state_t * state, struct directory_regexps * dir_regexps, assoc_table * explicit_skips); /** * \brief handle a single dir * \param tree the project tree logic to use for this tree */ void arch_inventory_traversal_internal (struct arch_inventory_options * options, t_uchar const * user_dir, t_uchar const * tree_dir, inventory_state_t * state, assoc_table * explicit_skips, arch_project_tree_t * current_tree) { DIR * dir; ar_charstr files = 0; int n_files; int deferred_recursions_head; int deferred_recursions_tail; ar_legint deferred_recursions = 0; ar_legint is_deferred_nested = 0; struct directory_regexps * dir_regexps = 0; int x; safe_opendir (&dir, user_dir); files = 0; n_files = 0; while (1) { char * file; safe_readdir (&file, dir); if (!file) break; ar_push_char_star (&files, file); ++n_files; /* look for per-directory inventory regexps */ if (dir_regexps == 0 && (file[0] == '.') && !str_cmp(".arch-inventory", file)) { dir_regexps = lim_malloc (0, sizeof *dir_regexps); read_directory_regexps (dir_regexps, user_dir); } } safe_closedir (dir); qsort ((void *)files, n_files, sizeof (char *), cmp_files); /* We want to invoke `callback' on a lexically sorted list of paths. * Suppose that "foo" is a directory, but "foo-bar" also exists. * That means we have to invoke callbacks in the order: * * foo * foo-bar * foo/xyzzy * * When we detect that "foo" is a directory, we can't * necessarilly recurse immediately. Instead, we keep a queue * of deferred directories, recursing on them at the right time. */ deferred_recursions_head = 0; deferred_recursions_tail = 0; deferred_recursions = 0; is_deferred_nested = 0; ar_setsize_legint (&deferred_recursions, n_files); ar_setsize_legint (&is_deferred_nested, n_files); /* FIXME: parameterisation should occur before we get in here * RBC 20050321 */ if ((options->untagged_source_category == arch_inventory_source && !current_tree->untagged_is_source) || (options->untagged_source_category != arch_inventory_source && current_tree->untagged_is_source)) { current_tree->untagged_is_source = (options->untagged_source_category == arch_inventory_source); // debug (2, 6, "Overriding current_tree->untagged_is_source to be %s\n", current_tree->untagged_is_source ? "true" : "false"); } if (options->method != current_tree->tag_method) { current_tree->tag_method = options->method; // debug (2, 6, "Overriding tagging method to be %d\n", options->method); } x = 0; while ((x < n_files) || (deferred_recursions_head != deferred_recursions_tail)) { do_file_or_deferred(&deferred_recursions_head, &deferred_recursions_tail, &x, n_files, files, deferred_recursions, is_deferred_nested, user_dir, tree_dir, current_tree, options, state, dir_regexps, explicit_skips); } ar_for_each (files, x) lim_free (0, files[x]); ar_free_charstr (&files); ar_free_legint (&deferred_recursions); ar_free_legint (&is_deferred_nested); free_directory_regexps (dir_regexps); } static void set_file (invent_callback_data_t *self, t_uchar const * const current_dir, char * file) { self->path = file_name_in_vicinity (0, current_dir, file); } void do_file_or_deferred (int * deferred_recursions_head, int * deferred_recursions_tail, int *x, int const n_files, ar_charstr files, ar_legint deferred_recursions, ar_legint is_deferred_nested, t_uchar const * const user_dir, t_uchar const * const tree_dir, arch_project_tree_t * current_tree, struct arch_inventory_options * options, inventory_state_t * state, struct directory_regexps * const dir_regexps, assoc_table * explicit_skips) { char * file; struct stat *stat_buf; int is_control = 0; t_uchar * tree_rel_path = NULL; invent_callback_data_t cbparam = {0,}; if ((*deferred_recursions_head != *deferred_recursions_tail) && ((*x >= n_files) || right_order_for_recursion (files[deferred_recursions[*deferred_recursions_head]], files[*x]))) { int deferred_nested = is_deferred_nested[*deferred_recursions_head]; set_file (&cbparam, user_dir, files[deferred_recursions[*deferred_recursions_head]]); tree_rel_path = file_name_in_vicinity (0, tree_dir, files[deferred_recursions[*deferred_recursions_head]]); ++*deferred_recursions_head; if (deferred_nested) goto handle_deferred_nested; else goto handle_deferred; } file = files[*x]; ++*x; set_file (&cbparam, user_dir, file); stat_buf = &cbparam.stat_buf; cbparam.id = 0; cbparam.has_source_name = 0; debug (dbg_invent, 8, "do_file_or_deferred: file %s\n", file); /* . and .. are mandatory exclude files */ if (!str_cmp (".", file) || !str_cmp ("..", file)) { next_file: lim_free (0, cbparam.path); lim_free (0, tree_rel_path); return; } tree_rel_path = file_name_in_vicinity (0, tree_dir, file); { t_uchar * abs_path = file_name_in_vicinity (0, current_tree->root, tree_rel_path); safe_lstat (abs_path, &cbparam.stat_buf); lim_free (0, abs_path); } /* only symlinks, directories and plain files are * considerably source, all others need to be precious */ if ( !S_ISREG(stat_buf->st_mode) && !S_ISLNK(stat_buf->st_mode) && !S_ISDIR(stat_buf->st_mode)) { goto precious_file; } /* non-printing characters, spaces, and glob characters are * mandatory unrecognized files */ if (contains_illegal_character (file)) { unrecognized_file: if (options->categories & arch_inventory_unrecognized) { cbparam.category = arch_inventory_unrecognized; state->callback (state->closure, &cbparam); } goto next_file; } /* callers can specify a pattern for additional files to * exclude from consideration. */ if (!options->include_excluded && ((dir_regexps && DIR_REGEXP(dir_regexps, excludes) && filename_matches (DIR_REGEXP(dir_regexps, excludes), file)) || filename_matches (&options->regexps.excludes_pattern, file))) goto next_file; /* arch control files that get past the exclude pattern are * always source. */ is_control = arch_is_control_file (cbparam.path, file); if (is_control) { goto handle_source_file; } /* file names beginning with "++" are always precious. */ if ((file[0] == '+') && (file[1] == '+')) { goto precious_file; } /* file names beginning with ",," are always considered junk files. */ if (file[0] == ',' && file[1] == ',') goto junk_file; /* test against optional per-directory regexps first */ if (dir_regexps) { regex_t * re; /* junk */ re = DIR_REGEXP(dir_regexps, junk); if (re && filename_matches (re, file)) goto junk_file; /* backup */ re = DIR_REGEXP(dir_regexps, backup); if (re && filename_matches (re, file)) goto backup_file; /* precious */ re = DIR_REGEXP(dir_regexps, precious); if (re && filename_matches (re, file)) goto precious_file; /* unrecognized */ re = DIR_REGEXP(dir_regexps, unrecognized); if (re && filename_matches (re, file)) goto unrecognized_file; /* source */ re = DIR_REGEXP(dir_regexps, source); if (re && filename_matches (re, file)) goto handle_source_file; } /* callers can specify a pattern for "junk" files -- files * presumed safe-to-be-removed by automatic tools, barring * concurrent tools. */ if (filename_matches (&options->regexps.junk_pattern, file)) { junk_file: if (options->categories & arch_inventory_junk) { cbparam.category = arch_inventory_junk; state->callback (state->closure, &cbparam); } goto next_file; } /* callers can specify a pattern for "backup" files -- files * that are created by editors and similar programs to save old * versions */ if (filename_matches (&options->regexps.backup_pattern, file)) { backup_file: if (options->categories & arch_inventory_backup) { cbparam.category = arch_inventory_backup; state->callback (state->closure, &cbparam); } goto next_file; } /* callers can specify a pattern for "precious" files -- files * that are not part of the source, but which should never be * automatically removed. */ if (filename_matches (&options->regexps.precious_pattern, file)) { precious_file: if (options->categories & arch_inventory_precious) { cbparam.category = arch_inventory_precious; state->callback (state->closure, &cbparam); } goto next_file; } /* callers can specify a pattern for explicitly "unrecognized" files -- * files that should be flagged as errors in tree-lint reports. */ if (filename_matches (&options->regexps.unrecognized_pattern, file)) { goto unrecognized_file; } /* finally, a pattern for "source" files -- files which are expected * to be source files. Note that the option untagged_source_category * determines the final disposition of files which match the source * pattern, but have no evident id. * * If a directory appears to be a source directory, but contains a rules * directory of its own, then it is in fact the root of a nested tree -- not * a regular source file. */ if (filename_matches (&options->regexps.source_pattern, file)) { handle_source_file: if (S_ISDIR (stat_buf->st_mode) && is_nested_tree (cbparam.path)) { if (options->categories & arch_inventory_tree) { cbparam.category = arch_inventory_tree; cbparam.has_source_name = 1; state->callback (state->closure, &cbparam); } if (options->nested) { if ((*x < n_files) && !right_order_for_recursion (file, files[*x])) { deferred_recursions[*deferred_recursions_tail] = *x - 1; is_deferred_nested[*deferred_recursions_tail] = 1; ++*deferred_recursions_tail; goto next_file; } handle_deferred_nested: { struct arch_inventory_options nest_opts; arch_project_tree_t * nested_tree; nested_tree = arch_project_tree_new_ext (talloc_context, cbparam.path, 1, 1); mem_set0 ((t_uchar *)&nest_opts, sizeof nest_opts); copy_options_but_regexps (&nest_opts, options); arch_get_inventory_naming_conventions (&nest_opts, nested_tree); arch_inventory_traversal_internal (&nest_opts, cbparam.path, ".", state, explicit_skips, nested_tree); arch_free_inventory_naming_conventions (&nest_opts); if (nested_tree != current_tree) arch_project_tree_delete (nested_tree); } } goto next_file; } else { t_uchar * id; enum arch_inventory_category this_files_category; /* Not a nested tree. Matches the source pattern. */ id = 0; /* Do we need to compute the inventory id of this file which has a source name? * * Certainly so if the caller wants ids. * * Otherwise we need the id only if we need it to verify that this * is, indeed, source: * * If untagged-source is source, then we don't need the id. * If untagged-source is something else, then we need to see if it has an id. */ if (options->want_ids || !current_tree->untagged_is_source) { /* if the caller wants tags, or if we can only be certain that this is * source by seeing if it has an id, get the id. */ assoc_table saved_explicit_skips; /* FIXME - callers should be setting this in the tree in the first place */ saved_explicit_skips = current_tree->explicit_skips; current_tree->explicit_skips = *explicit_skips; id = arch_inventory_id (current_tree, tree_rel_path, stat_buf); /* restore */ *explicit_skips = current_tree->explicit_skips; current_tree->explicit_skips = saved_explicit_skips; } /* What is the category of the file? * * If we've confirmed that it has an id, then it's certainly source. * Also if untagged-source is automatically source. * * If untagged-source is not source there are two cases: * * If it's a control file, then it is unrecognized, unconditionally. * * Otherwise, the untagged-source directive tells us what it is. */ if (id || (options->untagged_source_category == arch_inventory_source)) this_files_category = arch_inventory_source; else if (is_control) this_files_category = arch_inventory_unrecognized; else this_files_category = options->untagged_source_category; /* Some callbacks want to see "matches source name but unrecognized * for want of tag" as a source file. They distinguish this from * "matches unrecognized name" or "doesn't match any pattern". * RBC 20050409 this is used by changeset-inventory in trees * where untagged-source = unrecognized, where explicit-default * tags are disabled, so that explicit-default tagged files * are returned to the changeset inventory. * so what happens is that in untagged-source unrecognized trees. * If the file is a 'dont-care' dir, then changeset-creation ignores * it silently, otherwise barfs. * to trigger this you need: * * ids requested * * file is source * * file has no id * * untagged-source unrecognized * * explicit-defailt --dont-care * which means that id will always be 0. * to eliminate this option, we could just not callback in this circumstance. */ if (options->treat_unrecognized_source_as_source && (this_files_category == arch_inventory_unrecognized)) { this_files_category = arch_inventory_source; } if (this_files_category & options->categories) { cbparam.category = this_files_category; cbparam.id = id; cbparam.has_source_name = 1; state->callback (state->closure, &cbparam); } if (this_files_category != arch_inventory_source) goto next_file; lim_free (0, id); id = 0; /* recurse into directories, or plan to later. */ if (S_ISDIR (stat_buf->st_mode)) { if ((*x < n_files) && !right_order_for_recursion (file, files[*x])) { deferred_recursions[*deferred_recursions_tail] = *x - 1; is_deferred_nested[*deferred_recursions_tail] = 0; ++*deferred_recursions_tail; goto next_file; } handle_deferred: if (! options->no_recursion) arch_inventory_traversal_internal (options, cbparam.path, tree_rel_path, state, explicit_skips, current_tree); } goto next_file; } } else goto unrecognized_file; } static int cmp_files (const void * va, const void * vb) { char * a; char * b; a = *(char **)va; b = *(char **)vb; return str_cmp (a, b); } static int right_order_for_recursion (t_uchar * a, t_uchar * b) { /* a and b are already in lexical order (a < b) */ while ((*a == *b) && *a && *b) { ++a; ++b; } if (!*a) { /* Does "A/" come before "B" in an alphabetical listing? */ return (*b > '/'); } else if (!*b) { /* Does "B/" come after "A" in an aphabetical listing? */ return (*a < '/'); } else { invariant (*a < *b); return 1; } } static int contains_illegal_character (char * filename) { int x; for (x = 0; filename[x]; ++x) { invariant (filename[x] != '/'); if (char_is_non_ascii (((t_uchar *)filename)[x])) return 1; } return 0; } static int filename_matches (regex_t * pattern, char * filename) { int answer; answer = regexec (pattern, filename, 0, 0, 0); if (answer == REG_NOMATCH) return 0; if (answer == REG_NOERROR) return 1; panic ("unexpected regexec error in arch_inventory_traversal"); return -1; } int arch_is_control_file (char * rel_file, char * filename) { static int compiled = 0; static regex_t control_pattern = {0,}; if (!compiled) { int re_error; re_error = regcomp (&control_pattern, "^((.*/)?(\\.arch-ids(/(=id|[^/]*\\.id))?|\\{arch\\}((/[a-zA-Z=][^/~]*)(/[0-9a-zA-Z=][^/~]*)*)?|\\{arch\\}/\\.arch-project-tree|\\.arch-inventory))$", REG_EXTENDED); invariant (!re_error); compiled = 1; } return filename_matches (&control_pattern, rel_file); } static int is_nested_tree (char * path) { return arch_project_tree_dir_is_root (NULL, path, NULL); } static int is_comment_line (t_uchar * line, long len) { return !len || char_is_space (line[0]) || (line[0] == '#'); } static int sets_re (char * kw, char ** re, t_uchar * line, long len) { int l; t_uchar * tem; t_uchar * tem2; l = str_length (kw); if (len < (l + 1)) return 0; if (str_cmp_prefix (kw, line) || !char_is_space (line[l])) return 0; line += l; len -= l; while (len && char_is_space (line[0])) { ++line; --len; } while (len && char_is_space (line [len - 1])) --len; if (!*re) { tem2 = str_save_n (0, line, len); tem = str_alloc_cat (0, "(", tem2); tem = str_realloc_cat (0, tem, ")"); lim_free (0, tem2); *re = tem; } else { tem2 = str_save_n (0, line, len); tem = str_alloc_cat (0, "|(", tem2); tem = str_realloc_cat (0, tem, ")"); *re = str_realloc_cat (0, *re, tem); lim_free (0, tem); lim_free (0, tem2); } return 1; } static int sets_id_tagging_method (char * kw, enum arch_id_tagging_method * method_var, enum arch_inventory_category * untagged_category_var, enum arch_id_tagging_method method, enum arch_inventory_category untagged_category, t_uchar * line, long len) { int l; l = str_length (kw); if (len < (l + 1)) return 0; if (str_cmp_prefix (kw, line) || !char_is_space (line[l])) return 0; *method_var = method; *untagged_category_var = untagged_category; return 1; } static int sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var, t_uchar * saved_line, long saved_len) { t_uchar * line = saved_line; long len = saved_len; t_uchar * kw = "untagged-source"; int l; t_uchar * spec = 0; enum arch_inventory_category cat; l = str_length (kw); if (len < (l + 1)) return 0; if (str_cmp_prefix (kw, line) || !char_is_space (line[l])) return 0; len -= l; line += l; while (len && char_is_blank (line[0])) { ++line; --len; } spec = line; while (len && char_is_alpha (line[0])) { ++line; --len; } spec = str_save_n (0, spec, line - spec); while (len && char_is_blank (line[0])) { ++line; --len; } if (len && (line[0] != '\n')) { syntax_error: safe_printfmt (2, "arch: syntax error in =tagging-method:\n %.*s\n", (int)saved_len, saved_line); exit (2); } else { if (!str_cmp (spec, "source")) { cat = arch_inventory_source; } else if (!str_cmp (spec, "precious")) { cat = arch_inventory_precious; } else if (!str_cmp (spec, "backup")) { cat = arch_inventory_backup; } else if (!str_cmp (spec, "junk")) { cat = arch_inventory_junk; } else if (!str_cmp (spec, "unrecognized")) { cat = arch_inventory_unrecognized; } else goto syntax_error; *untagged_category_var = cat; } lim_free (0, spec); return 1; } static void read_directory_regexps (struct directory_regexps * regexps, char const * dir_name) { char * excludes = 0; char * junk = 0; char * backup = 0; char * precious = 0; char * unrecognized = 0; char * source = 0; t_uchar * file_name = 0; int fd; file_name = file_name_in_vicinity (0, dir_name, ".arch-inventory"); fd = safe_open (file_name, O_RDONLY, 0); while (1) { t_uchar * line; long len; safe_next_line (&line, &len, fd); if (!len) break; (void)(!is_comment_line (line, len) && !sets_re ("exclude", &excludes, line, len) && !sets_re ("junk", &junk, line, len) && !sets_re ("backup", &backup, line, len) && !sets_re ("precious", &precious, line, len) && !sets_re ("unrecognized", &unrecognized, line, len) && !sets_re ("source", &source, line, len)); } safe_close (fd); mem_set0((t_uchar*)regexps->regexps, sizeof regexps->regexps); if (excludes) { if (regcomp (®exps->storage.excludes_pattern, excludes, REG_EXTENDED)) { panic_msg ("bogus tagging-method regexp for `excludes' in "); panic (dir_name); } DIR_REGEXP(regexps, excludes) = ®exps->storage.excludes_pattern; } if (junk) { if (regcomp (®exps->storage.junk_pattern, junk, REG_EXTENDED)) { panic_msg ("bogus tagging-method regexp for `junk' in "); panic (dir_name); } DIR_REGEXP(regexps, junk) = ®exps->storage.junk_pattern; } if (backup) { if (regcomp (®exps->storage.backup_pattern, backup, REG_EXTENDED)) { panic_msg ("bogus tagging-method regexp for `backup' in "); panic (dir_name); } DIR_REGEXP(regexps, backup) = ®exps->storage.backup_pattern; } if (precious) { if (regcomp (®exps->storage.precious_pattern, precious, REG_EXTENDED)) { panic_msg ("bogus tagging-method regexp for `precious' in "); panic (dir_name); } DIR_REGEXP(regexps, precious) = ®exps->storage.precious_pattern; } if (unrecognized) { if (regcomp (®exps->storage.unrecognized_pattern, unrecognized, REG_EXTENDED)) { panic_msg ("bogus tagging-method regexp for `unrecognized' in "); panic (dir_name); } DIR_REGEXP(regexps, unrecognized) = ®exps->storage.unrecognized_pattern; } if (source) { if (regcomp (®exps->storage.source_pattern, source, REG_EXTENDED)) { panic_msg ("bogus tagging-method regexp for `source' in "); panic (dir_name); } DIR_REGEXP(regexps, source) = ®exps->storage.source_pattern; } lim_free (0, file_name); lim_free (0, excludes); lim_free (0, junk); lim_free (0, backup); lim_free (0, precious); lim_free (0, unrecognized); lim_free (0, source); } static void free_directory_regexps (struct directory_regexps * regexps) { int i; if (!regexps) return; for (i = 0; i < sizeof regexps->regexps / sizeof *regexps->regexps; ++i) if (regexps->regexps[i]) regfree (regexps->regexps[i]); lim_free (0, regexps); } rel_table pick_non_control (rel_table table) { int x; rel_table answer = 0; for (x = 0; x < rel_n_records (table); ++x) { t_uchar * id; id = table[x][1]; if (str_cmp_prefix ("A_", id)) rel_add_records (&answer, rel_copy_record (table[x]), 0); } return answer; } /** * \brief normalise a provided path on string alone. * * \return a normalised path. */ static t_uchar * normal_from_path (t_uchar const *path) { int from=0; int length = str_length (path); t_uchar *answer; if (!length) return NULL; /* strip back to the first path element */ if (path[0] == '/') { from = 1; length -= 1; } else if (!str_cmp_prefix ("./", path)) { from = 2; length -= 2; } if (!length) return NULL; /* now knock of any trailing / */ if (path[from + length - 1] == '/') length -= 1; if (!length) return NULL; answer = str_save_n (0, &path [from], length); return str_replace (answer, str_alloc_cat (0, "./", answer)); } /** * \brief normalise a provided path as a prefix for filtering against * \return a normalised path. */ static t_uchar * prefix_from_path (t_uchar const *path) { t_uchar *answer; answer = normal_from_path (path); if (!answer) return NULL; return str_replace (answer, str_alloc_cat (0, answer, "/")); } /** * \brief filter an_inventory so that only paths specified in filter_paths are presnet * * \param an_inventory a rel_table with path, id pairs. * \param filter_paths, a rel_table, with paths. paths should start with ./ */ rel_table arch_inventory_included (rel_table an_inventory, rel_table filter_paths) { rel_table answer = NULL; int scan_position; if (!rel_n_records (filter_paths)) return rel_copy_table (an_inventory); rel_for_each (an_inventory, scan_position) { int filter_position; int keep=0; rel_for_each (filter_paths, filter_position) { t_uchar *prefix_filter=prefix_from_path(filter_paths[filter_position][0]); t_uchar *exact_filter =normal_from_path(filter_paths[filter_position][0]); if (!prefix_filter) continue; if (!str_cmp_prefix (prefix_filter, an_inventory[scan_position][0])) { keep = 1; continue; } if (!str_cmp (exact_filter, an_inventory[scan_position][0])) { keep = 1; continue; } } if (keep) rel_add_records (&answer, rel_copy_record (an_inventory[scan_position]), 0); } return answer; } struct caching_inventory_state { cached_changeset_inventory_t * cache; inv_callback callback; void * closure; }; int cached_changeset_destructor (void *data) { cached_changeset_inventory_t * cache = talloc_get_type (data, cached_changeset_inventory_t); int index; ar_for_each (cache->inventory, index) talloc_free (cache->inventory[index]); ar_free_invent_cb (&cache->inventory); return 0; } /** * \brief perform a changeset inventory of tree, caching the results in cache, or using cached results, if the options are compatible */ void cached_changeset_inventory_traveral (struct arch_inventory_options * options, arch_project_tree_t * tree, inv_callback callback, void * closure, cached_changeset_inventory_t ** cache) { struct caching_inventory_state state; if (!*cache || incompatible_options (options, &(*cache)->options)) { talloc_free (*cache); *cache = talloc (NULL, cached_changeset_inventory_t); (*cache)->inventory = NULL; talloc_set_destructor (*cache, cached_changeset_destructor); copy_options_but_regexps (&(*cache)->options, options); state.cache = *cache; state.callback = callback; state.closure = closure; debug (dbg_invent, 8, "cached_changeset_inventory_traveral: inventorying %s\n", tree->root); arch_inventory_traversal (options, tree, cache_inventory_callback, &state); } else { int index; debug (dbg_invent, 8, "cached_changeset_inventory_traveral: cached inventory %s\n", tree->root); ar_for_each ((*cache)->inventory, index) callback (closure, (*cache)->inventory[index]); } } /** * \brief cache inventory results, simultanesouly calling the ultimate callee */ void cache_inventory_callback (void * closure, invent_callback_data_t const * const data) { struct caching_inventory_state * state = (struct caching_inventory_state *)closure; /* callback the client */ state->callback (state->closure, data); /* append the result to the cache */ ar_push_invent_cb (&state->cache->inventory, invent_cb_cache (data)); } /** * \brief create a cached invent_cb entry - talloc managed */ invent_callback_data_t * invent_cb_cache (invent_callback_data_t const * const data) { invent_callback_data_t *result = talloc (NULL, invent_callback_data_t); *result = *data; result->path = talloc_strdup (result, data->path); if (result->id) result->id = talloc_strdup (result, data->id); return result; } /* tag: Tom Lord Wed May 14 09:47:16 2003 (invent.c) */