/* invent.c: project tree inventory library routines
*
* vim:smartindent ts=8:sts=2:sta:et:ai:shiftwidth=2
****************************************************************
* Copyright (C) 2002, 2003 Tom Lord
*
* See the file "COPYING" for further information about
* the copyright and warranty status of this work.
*/
#include "hackerlab/bugs/exception.h"
#include "hackerlab/bugs/panic.h"
#include "hackerlab/os/errno.h"
#include "hackerlab/os/errno-to-string.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/mem/talloc.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/arrays/ar.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/safe.h"
#include "libarch/inode-sig.h"
#include "libarch/inv-ids.h"
#include "libarch/invent.h"
#include "libarch/pfs.h"
#include "libarch/debug.h"
struct directory_regexps
{
regex_t * regexps[sizeof (struct arch_inventory_regexps) / sizeof (regex_t)];
struct arch_inventory_regexps storage;
#define DIR_REGEXP(dir_re, name) \
(dir_re)->regexps[offsetof (struct arch_inventory_regexps, name##_pattern) / sizeof (regex_t)]
};
typedef struct
{
inv_callback callback;
void * closure;
assoc_table * explicit_skips;
} inventory_state_t;
/* __STDC__ prototypes for static functions */
static void copy_options_but_regexps (struct arch_inventory_options * dest,
const struct arch_inventory_options * src);
static int incompatible_options (struct arch_inventory_options const * left,
struct arch_inventory_options const * src);
static void cache_inventory_callback (void * closure, invent_callback_data_t const * const data);
static int cached_changeset_destructor (void * data);
static invent_callback_data_t * invent_cb_cache (invent_callback_data_t const * const data);
static void source_inventory_callback (void * closure, invent_callback_data_t const * const data);
static void source_inventory_files_callback (void * closure, invent_callback_data_t const * const data);
static void arch_inventory_traversal_internal (struct arch_inventory_options * options,
t_uchar const * user_dir,
t_uchar const * tree_dir,
inventory_state_t * state,
assoc_table * explicit_skips,
arch_project_tree_t * current_tree);
static int cmp_files (const void * va, const void * vb);
static int right_order_for_recursion (t_uchar * a, t_uchar * b);
static int contains_illegal_character (char * filename);
static int filename_matches (regex_t * pattern, char * filename);
static int is_nested_tree (char * path);
static int is_comment_line (t_uchar * line, long len);
static int sets_re (char * kw, char ** re, t_uchar * line, long len);
static int sets_id_tagging_method (char * kw,
enum arch_id_tagging_method * method_var,
enum arch_inventory_category * untagged_category_var,
enum arch_id_tagging_method method,
enum arch_inventory_category untagged_category,
t_uchar * line, long len);
static int sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var,
t_uchar * saved_line, long saved_len);
static void read_directory_regexps (struct directory_regexps * regexps, char const * dir_name);
static void free_directory_regexps (struct directory_regexps * regexps);
rel_table
arch_source_inventory (arch_project_tree_t * tree, int include_ctl, int include_precious, int include_nested)
{
int here_fd;
rel_table answer = 0;
struct arch_inventory_options options;
here_fd = safe_open (".", O_RDONLY, 0);
safe_chdir (tree->root);
mem_set0 ((t_uchar *)&options, sizeof (options));
options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
options.want_ids = 1;
options.method = arch_names_id_tagging; /* default only */
options.nested = include_nested;
options.include_excluded = !!include_ctl;
arch_get_inventory_naming_conventions (&options, tree);
arch_inventory_traversal (&options, tree, source_inventory_callback, (void *)&answer);
arch_free_inventory_naming_conventions (&options);
safe_fchdir (here_fd);
safe_close (here_fd);
return answer;
}
rel_table
arch_source_files_inventory (arch_project_tree_t * tree, int include_ctl, int include_precious)
{
int here_fd;
rel_table answer = 0;
struct arch_inventory_options options;
here_fd = safe_open (".", O_RDONLY, 0);
safe_chdir (tree->root);
mem_set0 ((t_uchar *)&options, sizeof (options));
options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
options.want_ids = 1;
options.method = arch_names_id_tagging; /* default only */
options.nested = 0;
options.include_excluded = !!include_ctl;
/* this fills out the real used inventory method. */
arch_get_inventory_naming_conventions (&options, tree);
arch_inventory_traversal (&options, tree, source_inventory_files_callback, (void *)&answer);
arch_free_inventory_naming_conventions (&options);
safe_fchdir (here_fd);
safe_close (here_fd);
return answer;
}
/**
* \brief are two inventory options compatible in terms of the results ?
* \return non 0 on incompatible
*/
int
incompatible_options (struct arch_inventory_options const * left,
struct arch_inventory_options const * right)
{
return left->categories != right->categories ||
left->method != right->method ||
left->want_ids != right->want_ids ||
left->treat_unrecognized_source_as_source != right->treat_unrecognized_source_as_source ||
left->nested != right->nested ||
left->include_excluded != right->include_excluded ||
left->override_method != right->override_method;
}
static void
copy_options_but_regexps (struct arch_inventory_options * dest,
const struct arch_inventory_options * src)
{
dest->categories = src->categories;
dest->method = src->method;
dest->want_ids = src->want_ids;
dest->treat_unrecognized_source_as_source = src->treat_unrecognized_source_as_source;
dest->nested = src->nested;
dest->include_excluded = src->include_excluded;
dest->override_method = src->override_method;
}
void
source_inventory_callback (void * closure, invent_callback_data_t const * const data)
{
rel_table * answer = (rel_table *)closure;
rel_add_records (answer, rel_make_record (data->path, data->id, 0), 0);
}
void
source_inventory_files_callback (void * closure, invent_callback_data_t const * const data)
{
rel_table * answer = (rel_table *)closure;
if (!S_ISDIR (data->stat_buf.st_mode))
rel_add_records (answer, rel_make_record (data->path, data->id, 0), 0);
}
t_uchar *
arch_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
switch (cat)
{
default:
{
panic ("unrecognized inventory category (arch_default_naming_conventions_regexp)");
return 0; /* not reached */
}
case arch_inventory_source:
{
return str_save (0, ".");
}
case arch_inventory_precious:
{
return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS|tags|cscope.*\\.out|\\.svn)$");
}
case arch_inventory_backup:
{
return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.sw(o|p)|\\.orig|\\.rej|\\.original|\\.modified|\\.reject|\\.(o|a|so|core|so(\\.[[:digit:]]+)*))$|^core$");
}
case arch_inventory_junk:
{
return str_save (0, "^(,.*)$");
}
case arch_inventory_unrecognized:
{
return str_save (0, "^$");
}
case arch_inventory_excludes:
{
return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$");
}
}
}
/* DO NOT under ANY circumastances change these. They predate pre-populating
* tagging-method and will break old archives if altered.
*/
t_uchar *
arch_ancient_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
switch (cat)
{
default:
{
panic ("unrecognized inventory category (arch_ancient_default_naming_conventions_regexp)");
return 0; /* not reached */
}
case arch_inventory_source:
{
return str_save (0, "^([_=a-zA-Z0-9].*|\\.arch-ids|\\{arch\\}|\\.arch-project-tree)$");
}
case arch_inventory_precious:
{
return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS)$");
}
case arch_inventory_backup:
{
return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.orig|\\.rej|\\.original|\\.modified|\\.reject)$");
}
case arch_inventory_junk:
{
return str_save (0, "^(,.*)$");
}
case arch_inventory_unrecognized:
{
return str_save (0, "^(.*\\.(o|a|so|core)|core)$");
}
case arch_inventory_excludes:
{
return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$");
}
}
}
t_uchar *
arch_null_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
switch (cat)
{
default:
{
panic ("unrecognized inventory category (arch_null_default_naming_conventions_regexp)");
return 0; /* not reached */
}
case arch_inventory_source:
{
return str_save (0, ".*");
}
case arch_inventory_precious:
{
return str_save (0, "^$");
}
case arch_inventory_backup:
{
return str_save (0, "^$");
}
case arch_inventory_junk:
{
return str_save (0, "^$");
}
case arch_inventory_unrecognized:
{
return str_save (0, "^$");
}
case arch_inventory_excludes:
{
return str_save (0, "^(.arch-ids|\\{arch\\})$");
}
}
}
void
arch_get_inventory_naming_conventions (struct arch_inventory_options * options,
arch_project_tree_t * tree)
{
char * excludes = 0;
char * junk = 0;
char * backup = 0;
char * precious = 0;
char * unrecognized = 0;
char * source = 0;
int re_error;
if (tree && tree->root)
{
t_uchar * id_tagging_method_file;
id_tagging_method_file = arch_tree_id_tagging_method_file (tree);
if (!safe_access (id_tagging_method_file, F_OK))
{
int in_fd;
t_uchar * line;
long len;
enum arch_id_tagging_method tree_method = arch_names_id_tagging;
enum arch_inventory_category untagged_source_category = arch_inventory_precious;
in_fd = safe_open (id_tagging_method_file, O_RDONLY, 0);
while (1)
{
safe_next_line (&line, &len, in_fd);
if (!len)
break;
(void)(!is_comment_line (line, len)
&& !sets_id_tagging_method ("implicit", &tree_method, &untagged_source_category, arch_implicit_id_tagging, arch_inventory_source, line, len)
&& !sets_id_tagging_method ("tagline", &tree_method, &untagged_source_category, arch_tagline_id_tagging, arch_inventory_source, line, len)
&& !sets_id_tagging_method ("explicit", &tree_method, &untagged_source_category, arch_explicit_id_tagging, arch_inventory_precious, line, len)
&& !sets_id_tagging_method ("names", &tree_method, &untagged_source_category, arch_names_id_tagging, arch_inventory_source, line, len)
&& !sets_untagged_source_disposition (&untagged_source_category, line, len)
&& !sets_re ("exclude", &excludes, line, len)
&& !sets_re ("junk", &junk, line, len)
&& !sets_re ("backup", &backup, line, len)
&& !sets_re ("precious", &precious, line, len)
&& !sets_re ("unrecognized", &unrecognized, line, len)
&& !sets_re ("source", &source, line, len));
}
safe_close (in_fd);
if (!options->override_method)
{
options->method = tree_method;
options->untagged_source_category = untagged_source_category;
}
}
/* default naming conventions.
*/
if (excludes == NULL)
excludes = arch_ancient_default_naming_conventions_regexp (arch_inventory_excludes);
if (junk == NULL)
junk = arch_ancient_default_naming_conventions_regexp (arch_inventory_junk);
if (backup == NULL)
backup = arch_ancient_default_naming_conventions_regexp (arch_inventory_backup);
if (precious == NULL)
precious = arch_ancient_default_naming_conventions_regexp (arch_inventory_precious);
if (unrecognized == NULL)
unrecognized = arch_ancient_default_naming_conventions_regexp (arch_inventory_unrecognized);
if (source == NULL)
source = arch_ancient_default_naming_conventions_regexp (arch_inventory_source);
lim_free (0, id_tagging_method_file);
}
else
{
excludes = arch_null_default_naming_conventions_regexp (arch_inventory_excludes);
junk = arch_null_default_naming_conventions_regexp (arch_inventory_junk);
backup = arch_null_default_naming_conventions_regexp (arch_inventory_backup);
precious = arch_null_default_naming_conventions_regexp (arch_inventory_precious);
unrecognized = arch_null_default_naming_conventions_regexp (arch_inventory_unrecognized);
source = arch_null_default_naming_conventions_regexp (arch_inventory_source);
}
/* compile the conventions.
*/
re_error = regcomp (&options->regexps.excludes_pattern, excludes, REG_EXTENDED);
if (re_error)
panic ("bogus id-tagging-method regexp for `excludes'");
re_error = regcomp (&options->regexps.junk_pattern, junk, REG_EXTENDED);
if (re_error)
panic ("bogus id-tagging-method regexp for `junk'");
re_error = regcomp (&options->regexps.backup_pattern, backup, REG_EXTENDED);
if (re_error)
panic ("bogus id-tagging-method regexp for `backup'");
re_error = regcomp (&options->regexps.precious_pattern, precious, REG_EXTENDED);
if (re_error)
panic ("bogus id-tagging-method regexp for `precious'");
re_error = regcomp (&options->regexps.unrecognized_pattern, unrecognized, REG_EXTENDED);
if (re_error)
panic ("bogus id-tagging-method regexp for `unrecognized'");
re_error = regcomp (&options->regexps.source_pattern, source, REG_EXTENDED);
if (re_error)
panic ("bogus id-tagging-method regexp for `source'");
lim_free (0, excludes);
lim_free (0, junk);
lim_free (0, backup);
lim_free (0, precious);
lim_free (0, unrecognized);
lim_free (0, source);
}
void
arch_free_inventory_naming_conventions (struct arch_inventory_options * options)
{
regfree (&options->regexps.excludes_pattern);
regfree (&options->regexps.junk_pattern);
regfree (&options->regexps.backup_pattern);
regfree (&options->regexps.precious_pattern);
regfree (&options->regexps.unrecognized_pattern);
regfree (&options->regexps.source_pattern);
}
void
arch_inventory_traversal (struct arch_inventory_options * options,
arch_project_tree_t * tree,
inv_callback callback,
void * closure)
{
arch_inventory_traversal_path (options, tree, ".", callback, closure);
}
void
arch_inventory_traversal_path (struct arch_inventory_options * options,
arch_project_tree_t * tree,
t_uchar const * const path,
inv_callback callback,
void * closure)
{
assoc_table saved_id_tagging_shortcut = 0;
assoc_table explicit_skips = 0;
inventory_state_t state;
t_uchar * tree_rel_path;
saved_id_tagging_shortcut = tree->id_tagging_shortcut;
if ((options->method == arch_implicit_id_tagging) || (options->method == arch_tagline_id_tagging) ||
(options->method == arch_explicit_id_tagging))
arch_read_id_shortcut (&tree->id_tagging_shortcut, tree->root);
state.callback = callback;
state.closure = closure;
tree_rel_path = arch_abs_path (path);
tree_rel_path = str_replace (tree_rel_path, arch_project_tree_rel_path_from_abs (tree, tree_rel_path));
arch_inventory_traversal_internal (options, path, tree_rel_path, &state, &explicit_skips, tree);
free_assoc_table (tree->id_tagging_shortcut);
tree->id_tagging_shortcut = saved_id_tagging_shortcut;
free_assoc_table (explicit_skips);
lim_free (0, tree_rel_path);
}
AR_TYPEDEF(char *, charstr);
AR_TYPEDEF(int, legint);
static void do_file_or_deferred (int *deferred_recursions_head, int * deferred_recursions_tail, int * x, int const n_files,
ar_charstr files, ar_legint deferred_recursions,
ar_legint is_deferred_nested,
t_uchar const * const user_dir,
t_uchar const * const tree_dir,
arch_project_tree_t * current_tree,
struct arch_inventory_options * options,
inventory_state_t * state,
struct directory_regexps * dir_regexps,
assoc_table * explicit_skips);
/**
* \brief handle a single dir
* \param tree the project tree logic to use for this tree
*/
void
arch_inventory_traversal_internal (struct arch_inventory_options * options,
t_uchar const * user_dir,
t_uchar const * tree_dir,
inventory_state_t * state,
assoc_table * explicit_skips,
arch_project_tree_t * current_tree)
{
DIR * dir;
ar_charstr files = 0;
int n_files;
int deferred_recursions_head;
int deferred_recursions_tail;
ar_legint deferred_recursions = 0;
ar_legint is_deferred_nested = 0;
struct directory_regexps * dir_regexps = 0;
int x;
safe_opendir (&dir, user_dir);
files = 0;
n_files = 0;
while (1)
{
char * file;
safe_readdir (&file, dir);
if (!file)
break;
ar_push_char_star (&files, file);
++n_files;
/* look for per-directory inventory regexps
*/
if (dir_regexps == 0 && (file[0] == '.') && !str_cmp(".arch-inventory", file))
{
dir_regexps = lim_malloc (0, sizeof *dir_regexps);
read_directory_regexps (dir_regexps, user_dir);
}
}
safe_closedir (dir);
qsort ((void *)files, n_files, sizeof (char *), cmp_files);
/* We want to invoke `callback' on a lexically sorted list of paths.
* Suppose that "foo" is a directory, but "foo-bar" also exists.
* That means we have to invoke callbacks in the order:
*
* foo
* foo-bar
* foo/xyzzy
*
* When we detect that "foo" is a directory, we can't
* necessarilly recurse immediately. Instead, we keep a queue
* of deferred directories, recursing on them at the right time.
*/
deferred_recursions_head = 0;
deferred_recursions_tail = 0;
deferred_recursions = 0;
is_deferred_nested = 0;
ar_setsize_legint (&deferred_recursions, n_files);
ar_setsize_legint (&is_deferred_nested, n_files);
/* FIXME: parameterisation should occur before we get in here
* RBC 20050321
*/
if ((options->untagged_source_category == arch_inventory_source && !current_tree->untagged_is_source) ||
(options->untagged_source_category != arch_inventory_source && current_tree->untagged_is_source))
{
current_tree->untagged_is_source = (options->untagged_source_category == arch_inventory_source);
// debug (2, 6, "Overriding current_tree->untagged_is_source to be %s\n", current_tree->untagged_is_source ? "true" : "false");
}
if (options->method != current_tree->tag_method)
{
current_tree->tag_method = options->method;
// debug (2, 6, "Overriding tagging method to be %d\n", options->method);
}
x = 0;
while ((x < n_files) || (deferred_recursions_head != deferred_recursions_tail))
{
do_file_or_deferred(&deferred_recursions_head, &deferred_recursions_tail, &x, n_files, files, deferred_recursions,
is_deferred_nested, user_dir, tree_dir, current_tree, options, state, dir_regexps, explicit_skips);
}
ar_for_each (files, x)
lim_free (0, files[x]);
ar_free_charstr (&files);
ar_free_legint (&deferred_recursions);
ar_free_legint (&is_deferred_nested);
free_directory_regexps (dir_regexps);
}
static void
set_file (invent_callback_data_t *self, t_uchar const * const current_dir, char * file)
{
self->path = file_name_in_vicinity (0, current_dir, file);
}
void
do_file_or_deferred (int * deferred_recursions_head, int * deferred_recursions_tail, int *x, int const n_files, ar_charstr files,
ar_legint deferred_recursions, ar_legint is_deferred_nested, t_uchar const * const user_dir,
t_uchar const * const tree_dir,
arch_project_tree_t * current_tree,
struct arch_inventory_options * options,
inventory_state_t * state,
struct directory_regexps * const dir_regexps,
assoc_table * explicit_skips)
{
char * file;
struct stat *stat_buf;
int is_control = 0;
t_uchar * tree_rel_path = NULL;
invent_callback_data_t cbparam = {0,};
if ((*deferred_recursions_head != *deferred_recursions_tail)
&& ((*x >= n_files)
|| right_order_for_recursion (files[deferred_recursions[*deferred_recursions_head]], files[*x])))
{
int deferred_nested = is_deferred_nested[*deferred_recursions_head];
set_file (&cbparam, user_dir, files[deferred_recursions[*deferred_recursions_head]]);
tree_rel_path = file_name_in_vicinity (0, tree_dir, files[deferred_recursions[*deferred_recursions_head]]);
++*deferred_recursions_head;
if (deferred_nested)
goto handle_deferred_nested;
else
goto handle_deferred;
}
file = files[*x];
++*x;
set_file (&cbparam, user_dir, file);
stat_buf = &cbparam.stat_buf;
cbparam.id = 0;
cbparam.has_source_name = 0;
debug (dbg_invent, 8, "do_file_or_deferred: file %s\n", file);
/* . and .. are mandatory exclude files
*/
if (!str_cmp (".", file) || !str_cmp ("..", file))
{
next_file:
lim_free (0, cbparam.path);
lim_free (0, tree_rel_path);
return;
}
tree_rel_path = file_name_in_vicinity (0, tree_dir, file);
{
t_uchar * abs_path = file_name_in_vicinity (0, current_tree->root, tree_rel_path);
safe_lstat (abs_path, &cbparam.stat_buf);
lim_free (0, abs_path);
}
/* only symlinks, directories and plain files are
* considerably source, all others need to be precious
*/
if ( !S_ISREG(stat_buf->st_mode)
&& !S_ISLNK(stat_buf->st_mode)
&& !S_ISDIR(stat_buf->st_mode))
{
goto precious_file;
}
/* non-printing characters, spaces, and glob characters are
* mandatory unrecognized files
*/
if (contains_illegal_character (file))
{
unrecognized_file:
if (options->categories & arch_inventory_unrecognized)
{
cbparam.category = arch_inventory_unrecognized;
state->callback (state->closure, &cbparam);
}
goto next_file;
}
/* callers can specify a pattern for additional files to
* exclude from consideration.
*/
if (!options->include_excluded
&& ((dir_regexps && DIR_REGEXP(dir_regexps, excludes) && filename_matches (DIR_REGEXP(dir_regexps, excludes), file))
|| filename_matches (&options->regexps.excludes_pattern, file)))
goto next_file;
/* arch control files that get past the exclude pattern are
* always source.
*/
is_control = arch_is_control_file (cbparam.path, file);
if (is_control)
{
goto handle_source_file;
}
/* file names beginning with "++" are always precious.
*/
if ((file[0] == '+') && (file[1] == '+'))
{
goto precious_file;
}
/* file names beginning with ",," are always considered junk files.
*/
if (file[0] == ',' && file[1] == ',')
goto junk_file;
/* test against optional per-directory regexps first
*/
if (dir_regexps)
{
regex_t * re;
/* junk */
re = DIR_REGEXP(dir_regexps, junk);
if (re && filename_matches (re, file))
goto junk_file;
/* backup */
re = DIR_REGEXP(dir_regexps, backup);
if (re && filename_matches (re, file))
goto backup_file;
/* precious */
re = DIR_REGEXP(dir_regexps, precious);
if (re && filename_matches (re, file))
goto precious_file;
/* unrecognized */
re = DIR_REGEXP(dir_regexps, unrecognized);
if (re && filename_matches (re, file))
goto unrecognized_file;
/* source */
re = DIR_REGEXP(dir_regexps, source);
if (re && filename_matches (re, file))
goto handle_source_file;
}
/* callers can specify a pattern for "junk" files -- files
* presumed safe-to-be-removed by automatic tools, barring
* concurrent tools.
*/
if (filename_matches (&options->regexps.junk_pattern, file))
{
junk_file:
if (options->categories & arch_inventory_junk)
{
cbparam.category = arch_inventory_junk;
state->callback (state->closure, &cbparam);
}
goto next_file;
}
/* callers can specify a pattern for "backup" files -- files
* that are created by editors and similar programs to save old
* versions
*/
if (filename_matches (&options->regexps.backup_pattern, file))
{
backup_file:
if (options->categories & arch_inventory_backup)
{
cbparam.category = arch_inventory_backup;
state->callback (state->closure, &cbparam);
}
goto next_file;
}
/* callers can specify a pattern for "precious" files -- files
* that are not part of the source, but which should never be
* automatically removed.
*/
if (filename_matches (&options->regexps.precious_pattern, file))
{
precious_file:
if (options->categories & arch_inventory_precious)
{
cbparam.category = arch_inventory_precious;
state->callback (state->closure, &cbparam);
}
goto next_file;
}
/* callers can specify a pattern for explicitly "unrecognized" files --
* files that should be flagged as errors in tree-lint reports.
*/
if (filename_matches (&options->regexps.unrecognized_pattern, file))
{
goto unrecognized_file;
}
/* finally, a pattern for "source" files -- files which are expected
* to be source files. Note that the option untagged_source_category
* determines the final disposition of files which match the source
* pattern, but have no evident id.
*
* If a directory appears to be a source directory, but contains a rules
* directory of its own, then it is in fact the root of a nested tree -- not
* a regular source file.
*/
if (filename_matches (&options->regexps.source_pattern, file))
{
handle_source_file:
if (S_ISDIR (stat_buf->st_mode) && is_nested_tree (cbparam.path))
{
if (options->categories & arch_inventory_tree)
{
cbparam.category = arch_inventory_tree;
cbparam.has_source_name = 1;
state->callback (state->closure, &cbparam);
}
if (options->nested)
{
if ((*x < n_files) && !right_order_for_recursion (file, files[*x]))
{
deferred_recursions[*deferred_recursions_tail] = *x - 1;
is_deferred_nested[*deferred_recursions_tail] = 1;
++*deferred_recursions_tail;
goto next_file;
}
handle_deferred_nested:
{
struct arch_inventory_options nest_opts;
arch_project_tree_t * nested_tree;
nested_tree = arch_project_tree_new_ext (talloc_context, cbparam.path, 1, 1);
mem_set0 ((t_uchar *)&nest_opts, sizeof nest_opts);
copy_options_but_regexps (&nest_opts, options);
arch_get_inventory_naming_conventions (&nest_opts, nested_tree);
arch_inventory_traversal_internal (&nest_opts, cbparam.path, ".", state, explicit_skips, nested_tree);
arch_free_inventory_naming_conventions (&nest_opts);
if (nested_tree != current_tree)
arch_project_tree_delete (nested_tree);
}
}
goto next_file;
}
else
{
t_uchar * id;
enum arch_inventory_category this_files_category;
/* Not a nested tree. Matches the source pattern.
*/
id = 0;
/* Do we need to compute the inventory id of this file which has a source name?
*
* Certainly so if the caller wants ids.
*
* Otherwise we need the id only if we need it to verify that this
* is, indeed, source:
*
* If untagged-source is source, then we don't need the id.
* If untagged-source is something else, then we need to see if it has an id.
*/
if (options->want_ids || !current_tree->untagged_is_source)
{
/* if the caller wants tags, or if we can only be certain that this is
* source by seeing if it has an id, get the id.
*/
assoc_table saved_explicit_skips;
/* FIXME - callers should be setting this in the tree in the first place */
saved_explicit_skips = current_tree->explicit_skips;
current_tree->explicit_skips = *explicit_skips;
id = arch_inventory_id (current_tree, tree_rel_path, stat_buf);
/* restore */
*explicit_skips = current_tree->explicit_skips;
current_tree->explicit_skips = saved_explicit_skips;
}
/* What is the category of the file?
*
* If we've confirmed that it has an id, then it's certainly source.
* Also if untagged-source is automatically source.
*
* If untagged-source is not source there are two cases:
*
* If it's a control file, then it is unrecognized, unconditionally.
*
* Otherwise, the untagged-source directive tells us what it is.
*/
if (id || (options->untagged_source_category == arch_inventory_source))
this_files_category = arch_inventory_source;
else if (is_control)
this_files_category = arch_inventory_unrecognized;
else
this_files_category = options->untagged_source_category;
/* Some callbacks want to see "matches source name but unrecognized
* for want of tag" as a source file. They distinguish this from
* "matches unrecognized name" or "doesn't match any pattern".
* RBC 20050409 this is used by changeset-inventory in trees
* where untagged-source = unrecognized, where explicit-default
* tags are disabled, so that explicit-default tagged files
* are returned to the changeset inventory.
* so what happens is that in untagged-source unrecognized trees.
* If the file is a 'dont-care' dir, then changeset-creation ignores
* it silently, otherwise barfs.
* to trigger this you need:
* * ids requested
* * file is source
* * file has no id
* * untagged-source unrecognized
* * explicit-defailt --dont-care
* which means that id will always be 0.
* to eliminate this option, we could just not callback in this circumstance.
*/
if (options->treat_unrecognized_source_as_source && (this_files_category == arch_inventory_unrecognized))
{
this_files_category = arch_inventory_source;
}
if (this_files_category & options->categories)
{
cbparam.category = this_files_category;
cbparam.id = id;
cbparam.has_source_name = 1;
state->callback (state->closure, &cbparam);
}
if (this_files_category != arch_inventory_source)
goto next_file;
lim_free (0, id);
id = 0;
/* recurse into directories, or plan to later.
*/
if (S_ISDIR (stat_buf->st_mode))
{
if ((*x < n_files) && !right_order_for_recursion (file, files[*x]))
{
deferred_recursions[*deferred_recursions_tail] = *x - 1;
is_deferred_nested[*deferred_recursions_tail] = 0;
++*deferred_recursions_tail;
goto next_file;
}
handle_deferred:
if (! options->no_recursion)
arch_inventory_traversal_internal (options, cbparam.path, tree_rel_path, state, explicit_skips, current_tree);
}
goto next_file;
}
}
else
goto unrecognized_file;
}
static int
cmp_files (const void * va, const void * vb)
{
char * a;
char * b;
a = *(char **)va;
b = *(char **)vb;
return str_cmp (a, b);
}
static int
right_order_for_recursion (t_uchar * a, t_uchar * b)
{
/* a and b are already in lexical order (a < b)
*/
while ((*a == *b) && *a && *b)
{
++a;
++b;
}
if (!*a)
{
/* Does "A/" come before "B" in an alphabetical listing?
*/
return (*b > '/');
}
else if (!*b)
{
/* Does "B/" come after "A" in an aphabetical listing?
*/
return (*a < '/');
}
else
{
invariant (*a < *b);
return 1;
}
}
static int
contains_illegal_character (char * filename)
{
int x;
for (x = 0; filename[x]; ++x)
{
invariant (filename[x] != '/');
if (char_is_non_ascii (((t_uchar *)filename)[x]))
return 1;
}
return 0;
}
static int
filename_matches (regex_t * pattern, char * filename)
{
int answer;
answer = regexec (pattern, filename, 0, 0, 0);
if (answer == REG_NOMATCH)
return 0;
if (answer == REG_NOERROR)
return 1;
panic ("unexpected regexec error in arch_inventory_traversal");
return -1;
}
int
arch_is_control_file (char * rel_file, char * filename)
{
static int compiled = 0;
static regex_t control_pattern = {0,};
if (!compiled)
{
int re_error;
re_error = regcomp (&control_pattern,
"^((.*/)?(\\.arch-ids(/(=id|[^/]*\\.id))?|\\{arch\\}((/[a-zA-Z=][^/~]*)(/[0-9a-zA-Z=][^/~]*)*)?|\\{arch\\}/\\.arch-project-tree|\\.arch-inventory))$",
REG_EXTENDED);
invariant (!re_error);
compiled = 1;
}
return filename_matches (&control_pattern, rel_file);
}
static int
is_nested_tree (char * path)
{
return arch_project_tree_dir_is_root (NULL, path, NULL);
}
static int
is_comment_line (t_uchar * line, long len)
{
return !len || char_is_space (line[0]) || (line[0] == '#');
}
static int
sets_re (char * kw, char ** re, t_uchar * line, long len)
{
int l;
t_uchar * tem;
t_uchar * tem2;
l = str_length (kw);
if (len < (l + 1))
return 0;
if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
return 0;
line += l;
len -= l;
while (len && char_is_space (line[0]))
{
++line;
--len;
}
while (len && char_is_space (line [len - 1]))
--len;
if (!*re)
{
tem2 = str_save_n (0, line, len);
tem = str_alloc_cat (0, "(", tem2);
tem = str_realloc_cat (0, tem, ")");
lim_free (0, tem2);
*re = tem;
}
else
{
tem2 = str_save_n (0, line, len);
tem = str_alloc_cat (0, "|(", tem2);
tem = str_realloc_cat (0, tem, ")");
*re = str_realloc_cat (0, *re, tem);
lim_free (0, tem);
lim_free (0, tem2);
}
return 1;
}
static int
sets_id_tagging_method (char * kw,
enum arch_id_tagging_method * method_var,
enum arch_inventory_category * untagged_category_var,
enum arch_id_tagging_method method,
enum arch_inventory_category untagged_category,
t_uchar * line, long len)
{
int l;
l = str_length (kw);
if (len < (l + 1))
return 0;
if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
return 0;
*method_var = method;
*untagged_category_var = untagged_category;
return 1;
}
static int
sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var,
t_uchar * saved_line, long saved_len)
{
t_uchar * line = saved_line;
long len = saved_len;
t_uchar * kw = "untagged-source";
int l;
t_uchar * spec = 0;
enum arch_inventory_category cat;
l = str_length (kw);
if (len < (l + 1))
return 0;
if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
return 0;
len -= l;
line += l;
while (len && char_is_blank (line[0]))
{
++line;
--len;
}
spec = line;
while (len && char_is_alpha (line[0]))
{
++line;
--len;
}
spec = str_save_n (0, spec, line - spec);
while (len && char_is_blank (line[0]))
{
++line;
--len;
}
if (len && (line[0] != '\n'))
{
syntax_error:
safe_printfmt (2, "arch: syntax error in =tagging-method:\n %.*s\n", (int)saved_len, saved_line);
exit (2);
}
else
{
if (!str_cmp (spec, "source"))
{
cat = arch_inventory_source;
}
else if (!str_cmp (spec, "precious"))
{
cat = arch_inventory_precious;
}
else if (!str_cmp (spec, "backup"))
{
cat = arch_inventory_backup;
}
else if (!str_cmp (spec, "junk"))
{
cat = arch_inventory_junk;
}
else if (!str_cmp (spec, "unrecognized"))
{
cat = arch_inventory_unrecognized;
}
else
goto syntax_error;
*untagged_category_var = cat;
}
lim_free (0, spec);
return 1;
}
static void
read_directory_regexps (struct directory_regexps * regexps, char const * dir_name)
{
char * excludes = 0;
char * junk = 0;
char * backup = 0;
char * precious = 0;
char * unrecognized = 0;
char * source = 0;
t_uchar * file_name = 0;
int fd;
file_name = file_name_in_vicinity (0, dir_name, ".arch-inventory");
fd = safe_open (file_name, O_RDONLY, 0);
while (1)
{
t_uchar * line;
long len;
safe_next_line (&line, &len, fd);
if (!len)
break;
(void)(!is_comment_line (line, len)
&& !sets_re ("exclude", &excludes, line, len)
&& !sets_re ("junk", &junk, line, len)
&& !sets_re ("backup", &backup, line, len)
&& !sets_re ("precious", &precious, line, len)
&& !sets_re ("unrecognized", &unrecognized, line, len)
&& !sets_re ("source", &source, line, len));
}
safe_close (fd);
mem_set0((t_uchar*)regexps->regexps, sizeof regexps->regexps);
if (excludes)
{
if (regcomp (®exps->storage.excludes_pattern, excludes, REG_EXTENDED))
{
panic_msg ("bogus tagging-method regexp for `excludes' in ");
panic (dir_name);
}
DIR_REGEXP(regexps, excludes) = ®exps->storage.excludes_pattern;
}
if (junk)
{
if (regcomp (®exps->storage.junk_pattern, junk, REG_EXTENDED))
{
panic_msg ("bogus tagging-method regexp for `junk' in ");
panic (dir_name);
}
DIR_REGEXP(regexps, junk) = ®exps->storage.junk_pattern;
}
if (backup)
{
if (regcomp (®exps->storage.backup_pattern, backup, REG_EXTENDED))
{
panic_msg ("bogus tagging-method regexp for `backup' in ");
panic (dir_name);
}
DIR_REGEXP(regexps, backup) = ®exps->storage.backup_pattern;
}
if (precious)
{
if (regcomp (®exps->storage.precious_pattern, precious, REG_EXTENDED))
{
panic_msg ("bogus tagging-method regexp for `precious' in ");
panic (dir_name);
}
DIR_REGEXP(regexps, precious) = ®exps->storage.precious_pattern;
}
if (unrecognized)
{
if (regcomp (®exps->storage.unrecognized_pattern, unrecognized, REG_EXTENDED))
{
panic_msg ("bogus tagging-method regexp for `unrecognized' in ");
panic (dir_name);
}
DIR_REGEXP(regexps, unrecognized) = ®exps->storage.unrecognized_pattern;
}
if (source)
{
if (regcomp (®exps->storage.source_pattern, source, REG_EXTENDED))
{
panic_msg ("bogus tagging-method regexp for `source' in ");
panic (dir_name);
}
DIR_REGEXP(regexps, source) = ®exps->storage.source_pattern;
}
lim_free (0, file_name);
lim_free (0, excludes);
lim_free (0, junk);
lim_free (0, backup);
lim_free (0, precious);
lim_free (0, unrecognized);
lim_free (0, source);
}
static void
free_directory_regexps (struct directory_regexps * regexps)
{
int i;
if (!regexps)
return;
for (i = 0; i < sizeof regexps->regexps / sizeof *regexps->regexps; ++i)
if (regexps->regexps[i])
regfree (regexps->regexps[i]);
lim_free (0, regexps);
}
rel_table
pick_non_control (rel_table table)
{
int x;
rel_table answer = 0;
for (x = 0; x < rel_n_records (table); ++x)
{
t_uchar * id;
id = table[x][1];
if (str_cmp_prefix ("A_", id))
rel_add_records (&answer, rel_copy_record (table[x]), 0);
}
return answer;
}
/**
* \brief normalise a provided path on string alone.
*
* \return a normalised path.
*/
static t_uchar *
normal_from_path (t_uchar const *path)
{
int from=0;
int length = str_length (path);
t_uchar *answer;
if (!length)
return NULL;
/* strip back to the first path element */
if (path[0] == '/')
{
from = 1;
length -= 1;
}
else if (!str_cmp_prefix ("./", path))
{
from = 2;
length -= 2;
}
if (!length)
return NULL;
/* now knock of any trailing / */
if (path[from + length - 1] == '/')
length -= 1;
if (!length)
return NULL;
answer = str_save_n (0, &path [from], length);
return str_replace (answer, str_alloc_cat (0, "./", answer));
}
/**
* \brief normalise a provided path as a prefix for filtering against
* \return a normalised path.
*/
static t_uchar *
prefix_from_path (t_uchar const *path)
{
t_uchar *answer;
answer = normal_from_path (path);
if (!answer)
return NULL;
return str_replace (answer, str_alloc_cat (0, answer, "/"));
}
/**
* \brief filter an_inventory so that only paths specified in filter_paths are presnet
*
* \param an_inventory a rel_table with path, id pairs.
* \param filter_paths, a rel_table, with paths. paths should start with ./
*/
rel_table
arch_inventory_included (rel_table an_inventory, rel_table filter_paths)
{
rel_table answer = NULL;
int scan_position;
if (!rel_n_records (filter_paths))
return rel_copy_table (an_inventory);
rel_for_each (an_inventory, scan_position)
{
int filter_position;
int keep=0;
rel_for_each (filter_paths, filter_position)
{
t_uchar *prefix_filter=prefix_from_path(filter_paths[filter_position][0]);
t_uchar *exact_filter =normal_from_path(filter_paths[filter_position][0]);
if (!prefix_filter)
continue;
if (!str_cmp_prefix (prefix_filter, an_inventory[scan_position][0]))
{
keep = 1;
continue;
}
if (!str_cmp (exact_filter, an_inventory[scan_position][0]))
{
keep = 1;
continue;
}
}
if (keep)
rel_add_records (&answer, rel_copy_record (an_inventory[scan_position]), 0);
}
return answer;
}
struct caching_inventory_state
{
cached_changeset_inventory_t * cache;
inv_callback callback;
void * closure;
};
int
cached_changeset_destructor (void *data)
{
cached_changeset_inventory_t * cache = talloc_get_type (data, cached_changeset_inventory_t);
int index;
ar_for_each (cache->inventory, index)
talloc_free (cache->inventory[index]);
ar_free_invent_cb (&cache->inventory);
return 0;
}
/**
* \brief perform a changeset inventory of tree, caching the results in cache, or using cached results, if the options are compatible
*/
void
cached_changeset_inventory_traveral (struct arch_inventory_options * options, arch_project_tree_t * tree, inv_callback callback, void * closure, cached_changeset_inventory_t ** cache)
{
struct caching_inventory_state state;
if (!*cache || incompatible_options (options, &(*cache)->options))
{
talloc_free (*cache);
*cache = talloc (NULL, cached_changeset_inventory_t);
(*cache)->inventory = NULL;
talloc_set_destructor (*cache, cached_changeset_destructor);
copy_options_but_regexps (&(*cache)->options, options);
state.cache = *cache;
state.callback = callback;
state.closure = closure;
debug (dbg_invent, 8, "cached_changeset_inventory_traveral: inventorying %s\n", tree->root);
arch_inventory_traversal (options, tree, cache_inventory_callback, &state);
}
else
{
int index;
debug (dbg_invent, 8, "cached_changeset_inventory_traveral: cached inventory %s\n", tree->root);
ar_for_each ((*cache)->inventory, index)
callback (closure, (*cache)->inventory[index]);
}
}
/**
* \brief cache inventory results, simultanesouly calling the ultimate callee
*/
void
cache_inventory_callback (void * closure, invent_callback_data_t const * const data)
{
struct caching_inventory_state * state = (struct caching_inventory_state *)closure;
/* callback the client */
state->callback (state->closure, data);
/* append the result to the cache */
ar_push_invent_cb (&state->cache->inventory, invent_cb_cache (data));
}
/**
* \brief create a cached invent_cb entry - talloc managed
*/
invent_callback_data_t *
invent_cb_cache (invent_callback_data_t const * const data)
{
invent_callback_data_t *result = talloc (NULL, invent_callback_data_t);
*result = *data;
result->path = talloc_strdup (result, data->path);
if (result->id)
result->id = talloc_strdup (result, data->id);
return result;
}
/* tag: Tom Lord Wed May 14 09:47:16 2003 (invent.c)
*/
syntax highlighted by Code2HTML, v. 0.9.1