/* inv-ids.c: * **************************************************************** * Copyright (C) 2002, 2003 Tom Lord * * See the file "COPYING" for further information about * the copyright and warranty status of this work. */ #include "hackerlab/bugs/panic.h" #include "hackerlab/bugs/exception.h" #include "hackerlab/os/errno.h" #include "hackerlab/os/errno-to-string.h" #include "hackerlab/os/time.h" #include "hackerlab/os/sys/types.h" #include "hackerlab/os/unistd.h" #include "hackerlab/mem/mem.h" #include "hackerlab/mem/talloc.h" #include "hackerlab/char/char-class.h" #include "hackerlab/char/str.h" #include "hackerlab/fmt/cvt.h" #include "hackerlab/fs/file-names.h" #include "hackerlab/vu/safe.h" #include "hackerlab/arrays/ar.h" #include "libfsutils/ensure-dir.h" #include "libarch/my.h" #include "libarch/project-tree.h" #include "libarch/patch-logs.h" #include "libarch/invent.h" #include "libarch/inode-sig.h" #include "libarch/inv-ids.h" #include "libarch/pfs.h" #include "hackerlab/fs/cwd.h" #include "po/gettext.h" #include "libawk/relational.h" #include "libarch/changelogs.h" #include "libarch/debug.h" /* __STDC__ prototypes for static functions */ static int is_at_or_underneath_archdir (char * rel_file); static int filename_matches (regex_t * pattern, char * filename); static t_uchar * explicit_id (int * errn, assoc_table * skips, struct alloc_limits * limits, t_uchar const * const arg_file, t_uchar * id_file, t_uchar * prefix, t_uchar * postfix, struct stat * statb, assoc_table id_tagging_shortcut); static t_uchar * implicit_id (int * errn, struct alloc_limits * limits, t_uchar const * const file, t_uchar * basename, t_uchar * prefix, struct stat * statb, assoc_table id_tagging_shortcut); static long smash_non_graphical (t_uchar * buf, long amt); t_uchar * arch_log_file_id (t_uchar * archive, t_uchar * revision) { t_uchar * log_file_path = 0; t_uchar * answer = 0; log_file_path = arch_log_file (".", archive, revision); answer = str_alloc_cat (0, "A_", log_file_path); lim_free (0, log_file_path); return answer; } assoc_table arch_filenames_ids (rel_table * file_list, arch_project_tree_t * tree) { struct arch_inventory_options options = {0, }; int i = 0; int file_size = rel_n_records (* file_list); assoc_table id_list = 0; options.categories = arch_inventory_source; options.want_ids = 1; options.include_excluded = 1; arch_get_inventory_naming_conventions (&options, tree); if (tree->untagged_is_source) { /* FIXME if this is annoying remove it. but its probably a bug to do this at all */ safe_printfmt (2, "overriding untagged is source for arch_filenames_ids\n"); tree->untagged_is_source = 0; } for (i = 0; i != file_size; ++ i) { t_uchar * id = arch_inventory_id (tree, (* file_list) [i] [0], 0); assoc_set (&id_list, id, (* file_list) [i] [0]); lim_free (0, id); } arch_free_inventory_naming_conventions (&options); return id_list; } /** * \brief retrieve a file id for an arbitrary file on disk * \param tree the tree to get the id for. * \param method override the inventory logic, use arch_unspecified_id_tagging to autodetect. * \param untagged_is_source give source file magic ids * \param path the filename to inventory * \param id_tagging_shortcut cached id tags * \param known_lstat prior stat result for this file * \param explicit_skips files to not id */ t_uchar * arch_inventory_id (arch_project_tree_t * tree, t_uchar const * const path, struct stat * known_lstat) { t_uchar * answer; inventory_entry_t *entry = arch_project_tree_path_inventory (tree, path, known_lstat); if (entry) answer = str_save (0, entry->id); else answer = NULL; /* the tree gives us a reference ... for transition purposes. * FIXME RBC 20050331 the tree should just give us a pointer, not a reference locked * object, and cache ids sanely. not possible for hackerlab trees yet. */ talloc_free (entry); return answer; } t_uchar * arch_id_tagging_method_name (enum arch_id_tagging_method m) { switch (m) { default: panic ("unknown id tagging method (arch_id_tagging_method_name)"); return 0; /* not reached */ case arch_names_id_tagging: return str_save (0, "names"); case arch_implicit_id_tagging: return str_save (0, "implicit"); case arch_tagline_id_tagging: return str_save (0, "tagline"); case arch_explicit_id_tagging: return str_save (0, "explicit"); } } enum arch_id_tagging_method arch_id_tagging_method_from_name (t_uchar * name) { if (!str_casecmp (name, "explicit")) return arch_explicit_id_tagging; else if (!str_casecmp (name, "implicit")) return arch_implicit_id_tagging; else if (!str_casecmp (name, "tagline")) return arch_tagline_id_tagging; else if (!str_casecmp (name, "names")) return arch_names_id_tagging; else { safe_printfmt (2, "no such id tagging method (%s)\n", name); exit (2); return arch_names_id_tagging; /* notreached */ } } t_uchar * arch_default_id_tagging_method_contents (enum arch_id_tagging_method method) { t_uchar * method_name = 0; t_uchar * excludes_regexp = 0; t_uchar * junk_regexp = 0; t_uchar * backup_regexp = 0; t_uchar * precious_regexp = 0; t_uchar * unrecognized_regexp = 0; t_uchar * source_regexp = 0; t_uchar * answer = 0; if (method == arch_unspecified_id_tagging) method = arch_explicit_id_tagging; method_name = arch_id_tagging_method_name (method); excludes_regexp = arch_default_naming_conventions_regexp (arch_inventory_excludes); backup_regexp = arch_default_naming_conventions_regexp (arch_inventory_backup); junk_regexp = arch_default_naming_conventions_regexp (arch_inventory_junk); precious_regexp = arch_default_naming_conventions_regexp (arch_inventory_precious); unrecognized_regexp = arch_default_naming_conventions_regexp (arch_inventory_unrecognized); source_regexp = arch_default_naming_conventions_regexp (arch_inventory_source); answer = str_alloc_cat_many (0, ("# id tagging method\n" "#\n" "# This determines how \"inventory ids\", strings conveying\n" "# logical file identity, are computed for each file, directory\n" "# and symbolic link.\n" "#\n" "# The choices are:\n" "#\n" "# tagline: inventory ids may be set using add-id, or omitted\n" "# (though tree-lint warns about omitted ids), or in\n" "# text files, set in a comment line near the top or\n" "# bottom of the file of a form like \" arch-tag: \".\n" "# Renames of files with no id are treated as a combined\n" "# add and delete (e.g., local changes can be lost).\n" "#\n" "# explicit: ids must be set using add-id. Files passing the naming\n" "# conventions for source, but lacking add-id ids, are treated\n" "# as unrecognized files (see below).\n" "#\n" "# names: ids are not used. All renames are treated as add+delete\n" "#\n" "# implicit: similar to tagline, but in addition, the id comment\n" "# may be of the form \" - \", where\n" "# is the basename of the file. This method\n" "# is not recommended, but is retained for backwards\n" "# compatibility.\n" "#\n" "\n"), method_name, "\n", "\n", ("# disposition of untagged source files\n" "#\n" "# (NOTE: this option must follow the tagline/explicit/names/implicit\n" "# directive.)\n" "#\n" "# By default, the explicit method treats untagged files matching the naming\n" "# conventions for source files as unrecognized and the implicit and tagline\n" "# methods treat such untagged files as source.\n" "#\n" "# You can override those default treatments of untagged files by specifying\n" "# which inventory category (see below) should be used for files whose names\n" "# suggest they are source but which lack ids.\n" "#\n" "# This feature may be especially convenient when importing sources that do\n" "# not use file naming conventions that can be conveniently described with\n" "# the regexps below.\n" "#\n" "# Uncomment one of these lines as appropriate to override the default:\n" "#\n" "# untagged-source source\n" "untagged-source precious\n" "# untagged-source backup\n" "# untagged-source junk\n" "# untagged-source unrecognized\n" "#\n" "\n"), ("# naming convention regexps\n" "#\n" "# For various commands, arch traverses your project trees, categorizing\n" "# the files found there. For example, when importing a project for\n" "# the first time, this traversal determines which files are included\n" "# in the import.\n" "#\n" "# The categories of greatest importance are defined in terms of three\n" "# questions:\n" "#\n" "# 1) If arch makes a local copy of this tree, should this file be included\n" "# in the copy?\n" "#\n" "# 2) Is it generally safe to remove this file based only on how it is named?\n" "# For example, can it be safely clobbered by a new file of the same name?\n" "#\n" "# 3) Should this file be archived along with the project? For example,\n" "# should it be included when importing the project for the first time?\n" "#\n" "# The primary categories are:\n" "#\n" "# category: copy locally? safe to clobber? archive?\n" "#\n" "# junk no yes no\n" "# backup no no no\n" "# precious yes no no\n" "# source yes no yes\n" "#\n" "# There are two additional categories, unrelated to those questions:\n" "#\n" "# excluded -- during a traversal by inventory, this file (and,\n" "# if a directory, its contents) are simply ignored unless the\n" "# --all flag is specified. This category is usually used to\n" "# omit arch's own control files from a listing.\n" "#\n" "# unrecognized -- a category for files whose name fits no other pattern.\n" "# Usually, the presence of unrecognized files is treated as an\n" "# error. You can use the naming conventions to define certain\n" "# names as \"deliberately unrecognized\" -- i.e., filenames whose\n" "# presence in a source tree you _want_ to be treated as an error\n" "#\n" "# The traveral algorithm is described here, along with lines you can edit to\n" "# customize the naming conventions.\n" "#\n" "# Starting at \".\" within a project tree (usually at the root of the\n" "# project tree) consider each filename in that directory.\n" "#\n" "# The files \".\" and \"..\" are simply ignored.\n" "#\n" "# Files containing \"illegal characters\" are characterized as unrecognized.\n" "# If they are directories, traversal does _not_ descend into those directories.\n" "# Currently, the illegal characters are *, ?, [, ], \\, space, and tab.\n" "# (The set of illegal characters may shrink in future releases.)\n" "#\n" "# In an interactive call to inventory _without_ the --all flag,\n" "# names are next compared to the exclude regexp defined here. Those that\n" "# are ignored and not descended below. (Most arch operations performing\n" "# traversals internally, e.g. import, do not use this pattern\n" "# and skip this step of the algorithm.\n" "#\n"), "\n", "exclude ", excludes_regexp, "\n", "\n", ("# If the file has a name that begins with \"++\", it is categorized as\n" "# _precious_. Names of this form are hard-wired and reserved for use by arch\n" "# itself. Traversal does not descend into precious directories, but when a\n" "# precious directory is copied, its contents are recursively copied.\n" "#\n" "# Files and directories that reach this stage and which arch recognizes as its\n" "# own control files are classified at this step as source. Traversal _does_\n" "# descend into source directories.\n" "#\n" "# If the file has a name that begins with \",,\", it is categorized as _junk_.\n" "# Names of this form are hard-wired and reserved for use by arch and other tools,\n" "# and arch may clobber such files without warning. In a project tree, when no \n" "# arch commands are running, it is safe for users to delete any \",,\" files. \n" "# Although the general rule for junk files is that arch is free to clobber them,\n" "# in fact, arch will only ever clobber files starting with \",,\".\n" "#\n" "# Traversal does not descend into junk directories.\n" "#\n" "# For your convenience, at this step of the traversal, you can classify\n" "# additional files as junk or precious:\n" "#\n"), "\n", "junk ", junk_regexp, "\n", "\n", "precious ", precious_regexp, "\n", "\n", ("# Files matching the following regexp are classified as backup files, and\n" "# traversal does not descend into backup directories:\n" "#\n"), "\n", "backup ", backup_regexp, "\n", "\n", ("# If you want to force certain filenames to be treated as errors when present,\n" "# you can add them to the regexp for deliberately unrecognized files. Traversal\n" "# does not descend into unrecognized directories.\n"), "\n", "unrecognized ", unrecognized_regexp, "\n", "\n", ("# Files which match the following pattern are treated as source files.\n" "# Traversal _does_ descend into source directories:\n"), "\n", "source ", source_regexp, "\n", "\n", ("# Any files not classified by the above rules are classified as unrecognized.\n" "# Traversal does not descend into unrecognized directories.\n" "\n"), str_end); lim_free (0, method_name); lim_free (0, excludes_regexp); lim_free (0, junk_regexp); lim_free (0, backup_regexp); lim_free (0, precious_regexp); lim_free (0, unrecognized_regexp); lim_free (0, source_regexp); return answer; } t_uchar * arch_tree_id_tagging_method_file (arch_project_tree_t * tree) { t_uchar * ctl_dir; t_uchar * answer; ctl_dir = arch_tree_ctl_dir (tree->root); answer = file_name_in_vicinity (0, ctl_dir, "=tagging-method"); lim_free (0, ctl_dir); return answer; } /** * \brief determing the id tagging method for a tree * * this is the only function allowed to call vtable->id_tagging_method directly, * all other functions should call this wrapper */ enum arch_id_tagging_method arch_tree_id_tagging_method (arch_project_tree_t * tree, enum arch_inventory_category * cat_var, int strict) { return tree->vtable->id_tagging_method (tree, cat_var, strict); } void arch_set_tree_id_tagging_method (arch_project_tree_t * tree, enum arch_id_tagging_method method) { int errn; t_uchar * method_name; t_uchar * method_file; t_uchar * method_dir; t_uchar * method_tmp; int out_fd; method_name = arch_id_tagging_method_name (method); method_file = arch_tree_id_tagging_method_file (tree); method_dir = file_name_directory_file (0, method_file); method_tmp = file_name_in_vicinity (0, method_dir, ",,tagging-method"); vu_unlink (&errn, method_tmp); out_fd = safe_open (method_tmp, O_WRONLY | O_CREAT | O_EXCL, 0666); if (safe_access (method_file, F_OK)) { safe_printfmt (out_fd, "%s\n", method_name); } else { int in_fd; t_uchar * line; long len; int emitted_method; in_fd = safe_open (method_file, O_RDONLY, 0); emitted_method = 0; while (1) { t_uchar * pos; t_uchar * lim; int line_maybe_specifies_method; enum arch_id_tagging_method maybe_method; int replace_with_method_name; safe_next_line (&line, &len, in_fd); if (!line) break; lim = line + len; pos = line; line_maybe_specifies_method = 0; replace_with_method_name = 0; while ((pos < lim) && char_is_blank (*pos)) ++pos; if (((lim - pos) >= (sizeof ("names") - 1)) && !str_casecmp_n ("names", sizeof ("names") - 1, pos, sizeof ("names") - 1)) { line_maybe_specifies_method = 1; maybe_method = arch_names_id_tagging; pos += sizeof ("names") - 1; } else if (((lim - pos) >= (sizeof ("explicit") - 1)) && !str_casecmp_n ("explicit", sizeof ("explicit") - 1, pos, sizeof ("explicit") - 1)) { line_maybe_specifies_method = 1; maybe_method = arch_explicit_id_tagging; pos += sizeof ("explicit") - 1; } else if (((lim - pos) >= (sizeof ("implicit") - 1)) && !str_casecmp_n ("implicit", sizeof ("implicit") - 1, pos, sizeof ("implicit") - 1)) { line_maybe_specifies_method = 1; maybe_method = arch_implicit_id_tagging; pos += sizeof ("implicit") - 1; } else if (((lim - pos) >= (sizeof ("tagline") - 1)) && !str_casecmp_n ("tagline", sizeof ("tagline") - 1, pos, sizeof ("tagline") - 1)) { line_maybe_specifies_method = 1; maybe_method = arch_tagline_id_tagging; pos += sizeof ("tagline") - 1; } if (line_maybe_specifies_method) { while ((pos < lim) && char_is_space (*pos)) ++pos; if (pos == lim) replace_with_method_name = 1; } if (replace_with_method_name) { safe_printfmt (out_fd, "%s\n", method_name); emitted_method = 1; } else { safe_printfmt (out_fd, "%.*s", (int)len, line); if (len && (line[len - 1] != '\n')) safe_printfmt (out_fd, "\n"); } } if (!emitted_method) safe_printfmt (out_fd, "%s\n", method_name); safe_close (in_fd); } safe_close (out_fd); safe_rename (method_tmp, method_file); lim_free (0, method_name); lim_free (0, method_file); lim_free (0, method_dir); lim_free (0, method_tmp); } t_uchar * arch_explicit_id_file_for (t_uchar * path) { int errn; struct stat stat_buf; int is_file; t_uchar * parent_dir; t_uchar * dot_arch_dir; t_uchar * id_file_basename; t_uchar * id_file_path; if (vu_lstat (&errn, path, &stat_buf)) { if (errn != ENOENT) { safe_printfmt (2, "i/o error (%d: %s) for vu_lstat of %s\n", errn, errno_to_string (errn), path); exit (2); } is_file = 1; } else { is_file = !S_ISDIR (stat_buf.st_mode); } if (is_file) parent_dir = file_name_directory_file (0, path); else parent_dir = str_save (0, path); dot_arch_dir = file_name_in_vicinity (0, parent_dir, ".arch-ids"); if (is_file) { id_file_basename = file_name_tail (0, path); id_file_basename = str_realloc_cat (0, id_file_basename, ".id"); } else id_file_basename = str_save (0, "=id"); id_file_path = file_name_in_vicinity (0, dot_arch_dir, id_file_basename); lim_free (0, parent_dir); lim_free (0, dot_arch_dir); lim_free (0, id_file_basename); return id_file_path; } t_uchar * arch_generate_id (void) { static unsigned long seq = 0; time_t now; char * now_str; t_uchar * nl; t_uchar * my_id; pid_t my_pid; t_uchar my_pid_str[128]; t_uchar seq_str[128]; t_uchar * id; if (0 > time (&now)) panic ("unable to get time of day in arch_generate_id"); now_str = ctime (&now); nl = str_chr_index (now_str, '\n'); if (nl) *nl = 0; my_id = arch_my_id (); my_pid = getpid (); cvt_ulong_to_decimal (my_pid_str, (unsigned long)my_pid); cvt_ulong_to_decimal (seq_str, (unsigned long)seq); ++seq; id = str_alloc_cat_many (0, my_id, " ", now_str, " ", my_pid_str, ".", seq_str, str_end); lim_free (0, my_id); return id; } /* precondition: path must be accessible */ t_uchar * arch_choose_explicit_id (arch_project_tree_t * tree, t_uchar * path, t_uchar * id) { t_uchar * tagline_id; t_uchar * new_explicit_id; int old_untagged_source; enum arch_id_tagging_method old_tagging; /* this sets untagged_is_source 0 so that we don't get names ids * however, this should not be needed - we should ask for the implicit * id directly * RBC 20050321 */ old_untagged_source = tree->untagged_is_source; tree->untagged_is_source = 0; /* FIXME RBC 20050321 overriding the tagging method is not necesarily possible * fix this leaky abstraction (perhaps 'has_real_id ?') */ old_tagging = tree->tag_method; tree->tag_method = arch_tagline_id_tagging; tagline_id = arch_inventory_id (tree, path, 0); tree->untagged_is_source = old_untagged_source; tree->tag_method = old_tagging; if (!tagline_id || !arch_id_indicates_changelog (tagline_id)) if (!id) new_explicit_id = arch_generate_id (); else new_explicit_id = str_save (0, id); else new_explicit_id = str_save (0, 2 + tagline_id); lim_free (0, tagline_id); return new_explicit_id; } int str_cmp_suffix_n(t_uchar * a_string, t_uchar * b_string, int length) { int len_a = str_length(a_string); int len_b = str_length(b_string); int cmp_length = (len_a < len_b) ? len_a : len_b; cmp_length = (cmp_length < length) ? cmp_length : length; return str_cmp(a_string + len_a - cmp_length, b_string + len_b - cmp_length); } static int path_exists (char const * path) { struct stat stat_buf; int errn; int answer; answer = vu_lstat (&errn, (char *)path, &stat_buf); return answer == 0; } t_uchar * arch_add_id (t_uchar * path, t_uchar * id) { t_uchar * new_explicit_id; arch_project_tree_t * tree; t_uchar * rel_path; t_uchar * full_path; /* don't add ids for non existing paths */ if (!path_exists (path)) { safe_printfmt (2, "attempt to add missing path: %s\n", path); return NULL; } /* don't add ids for control paths */ tree = arch_project_tree_new (talloc_context, path); full_path = arch_abs_path (path); rel_path = arch_project_tree_rel_path_from_abs (tree, full_path); if (arch_is_control_file (rel_path, NULL) && str_cmp_suffix_n(path, ".arch-inventory", 15)) { lim_free (0, full_path); lim_free (0, rel_path); arch_project_tree_delete (tree); return NULL; } lim_free (0, full_path); debug (dbg_invent, 3, _("getting id for (%s)\n"), rel_path); new_explicit_id = arch_choose_explicit_id (tree, rel_path, id); /* FIXME path here is full not rel */ arch_add_explicit_id (path, new_explicit_id); arch_project_tree_delete (tree); lim_free (0, rel_path); return new_explicit_id; } void arch_add_explicit_id (t_uchar * path, t_uchar * id) { t_uchar * id_file; t_uchar * id_dir; int out_fd; id_file = arch_explicit_id_file_for (path); if (!safe_access (id_file, F_OK)) { safe_printfmt (2, "attempt to id already tagged file: %s\n", path); exit (2); } id_dir = file_name_directory_file (0, id_file); ensure_directory_exists (id_dir); out_fd = safe_open (id_file, O_WRONLY | O_CREAT | O_EXCL, 0666); safe_printfmt (out_fd, "%s\n", id); safe_close (out_fd); lim_free (0, id_file); lim_free (0, id_dir); } void arch_delete_explicit_id (t_uchar * path) { t_uchar * id_file; id_file = arch_explicit_id_file_for (path); if (!safe_access (id_file, F_OK)) safe_unlink (id_file); else { safe_printfmt (2, "attempt to remove non-existent id for %s\n", path); exit (2); } lim_free (0, id_file); } void arch_move_explicit_id (t_uchar * from, t_uchar * to) { t_uchar * old_id_file; t_uchar * new_id_file; t_uchar * new_id_dir; old_id_file = arch_explicit_id_file_for (from); new_id_file = arch_explicit_id_file_for (to); new_id_dir = file_name_directory_file (0, new_id_file); ensure_directory_exists (new_id_dir); safe_rename (old_id_file, new_id_file); lim_free (0, old_id_file); lim_free (0, new_id_file); lim_free (0, new_id_dir); } t_uchar * arch_strong_explicit_dflt_file (t_uchar * dir) { t_uchar * dot_arch_dir; t_uchar * answer; dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids"); answer = file_name_in_vicinity (0, dot_arch_dir, "=all"); lim_free (0, dot_arch_dir); return answer; } t_uchar * arch_weak_explicit_dflt_file (t_uchar * dir) { t_uchar * dot_arch_dir; t_uchar * answer; dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids"); answer = file_name_in_vicinity (0, dot_arch_dir, "=default"); lim_free (0, dot_arch_dir); return answer; } t_uchar * arch_dont_care_explicit_dflt_file (t_uchar * dir) { t_uchar * dot_arch_dir; t_uchar * answer; dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids"); answer = file_name_in_vicinity (0, dot_arch_dir, "=dont-care"); lim_free (0, dot_arch_dir); return answer; } int arch_is_dont_care_explicit_dflt_dir (t_uchar * dir) { t_uchar * file = 0; int answer; file = arch_dont_care_explicit_dflt_file (dir); answer = !safe_access (file, F_OK); lim_free (0, file); return answer; } void arch_delete_strong_explicit_default (t_uchar * dir) { t_uchar * file; file = arch_strong_explicit_dflt_file (dir); if (safe_access (file, F_OK)) { safe_printfmt (2, "attempt to delete non-existing strong explicit default in %s\n", dir); exit (2); } safe_unlink (dir); lim_free (0, file); } void arch_delete_weak_explicit_default (t_uchar * dir) { t_uchar * file; file = arch_weak_explicit_dflt_file (dir); if (safe_access (file, F_OK)) { safe_printfmt (2, "attempt to delete non-existing weak explicit default in %s\n", dir); exit (2); } safe_unlink (dir); lim_free (0, file); } void arch_delete_dont_care_explicit_default (t_uchar * dir) { t_uchar * file; file = arch_dont_care_explicit_dflt_file (dir); if (safe_access (file, F_OK)) { safe_printfmt (2, "attempt to delete non-existing dont-care explicit default in %s\n", dir); exit (2); } safe_unlink (dir); lim_free (0, file); } void arch_set_strong_explicit_default (t_uchar * dir, t_uchar * id) { t_uchar * file; t_uchar * file_dir; int out_fd; file = arch_strong_explicit_dflt_file (dir); file_dir = file_name_directory_file (0, file); if (!safe_access (file, F_OK)) { safe_printfmt (2, "attempt to overwrite strong explicit default in %s\n", dir); exit (2); } ensure_directory_exists (file_dir); out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666); safe_printfmt (out_fd, "%s\n", id); safe_close (out_fd); lim_free (0, file); lim_free (0, file_dir); } void arch_set_weak_explicit_default (t_uchar * dir, t_uchar * id) { t_uchar * file; t_uchar * file_dir; int out_fd; file = arch_weak_explicit_dflt_file (dir); file_dir = file_name_directory_file (0, file); if (!safe_access (file, F_OK)) { safe_printfmt (2, "attempt to overwrite weak explicit default in %s\n", dir); exit (2); } ensure_directory_exists (file_dir); out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666); safe_printfmt (out_fd, "%s\n", id); safe_close (out_fd); lim_free (0, file); lim_free (0, file_dir); } void arch_set_dont_care_explicit_default (t_uchar * dir) { t_uchar * file; t_uchar * file_dir; int out_fd; file = arch_dont_care_explicit_dflt_file (dir); file_dir = file_name_directory_file (0, file); if (safe_access (file, F_OK)) { ensure_directory_exists (file_dir); out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666); safe_close (out_fd); } lim_free (0, file); lim_free (0, file_dir); } /** * \brief return a newly allocated string with the inventory id of the file / directory. * * WARNING. DO NOT CALL THIS FUNCTION. It is private to tla-project-tree.c, but not * relocated until more of the object model clears up. * Return 0 and set `*errn' if the id can not be computed. * If no I/O error occurs, the file does not have an explicit * id, but `method' is `ftag_explicit', `*errn' is set to 0. * \param errn put error details here * \param tree the tree to get the id for * \param limits unused ? * \param method override the tagging behaviour of the tree * \param untagged_is_source override the untagged-source bahviour of the tree * \param path the relative to cwd, or abs path to the file. * \param id_tagging_shortcut cached ids * \param known_lstat cached stat result * \param explicit_skips files to skip */ inventory_entry_t * tla_path_id (arch_project_tree_t * tree, int * errn, t_uchar const * const path, struct stat * known_lstat) { t_uchar * answer_id = 0; t_uchar * as_file = 0; t_uchar * basename = 0; t_uchar * dir = 0; t_uchar * dir_as_file = 0; t_uchar * dir_basename = 0; t_uchar * id_file = 0; t_uchar * fq_path = NULL; struct stat local_stat_buf; struct stat *stat_buf = NULL; int is_dir; int is_symlink; if (!path) { *errn = EINVAL; return_answer_id: lim_free (0, as_file); lim_free (0, dir); lim_free (0, dir_as_file); lim_free (0, dir_basename); lim_free (0, id_file); { /* FIXME RBC 20050331 typed returns - dir link etc */ inventory_entry_t *answer = NULL; /* FIXME RBC 20050331 make this cleaner and only-called once */ if (answer_id) { if (!stat_buf) { if (known_lstat) stat_buf = known_lstat; else { stat_buf = &local_stat_buf; if (vu_lstat (errn, fq_path, stat_buf)) { lim_free (0, basename); lim_free (0, fq_path); return NULL; } } } if (S_ISDIR (stat_buf->st_mode)) answer = inventory_dir_new (NULL, basename, answer_id); else if (S_ISLNK (stat_buf->st_mode)) answer = inventory_link_new (NULL, basename, answer_id); else answer = inventory_file_new (NULL, basename, answer_id); } lim_free (0, basename); lim_free (0, fq_path); return answer; } } as_file = file_name_from_directory (0, (t_uchar *)path); if (!as_file) { enomem_error: *errn = ENOMEM; goto return_answer_id; } fq_path = file_name_in_vicinity (0, tree->root, as_file); basename = file_name_tail (0, as_file); if (tree->tag_method == arch_names_id_tagging) { answer_id = str_alloc_cat (0, "?", as_file); if (!answer_id) goto enomem_error; else goto return_answer_id; } dir = file_name_directory (0, as_file); if (!dir) dir = str_save (0, "."); if (!(basename && dir)) goto enomem_error; dir_as_file = file_name_from_directory (0, dir); if (!dir_as_file) goto enomem_error; dir_basename = file_name_tail (0, dir_as_file); if (!dir_basename) goto enomem_error; /* Explicit id files use their contents as id, with the * prefix 'E'. */ if (!str_cmp (dir_basename, ".arch-ids")) { answer_id = explicit_id (errn, 0, 0, fq_path, fq_path, "E_", 0, known_lstat, tree->id_tagging_shortcut); goto return_answer_id; } /* Explicit id file directories: */ if (!str_cmp (basename, ".arch-ids")) { long amt; answer_id = str_alloc_cat (0, "D_", as_file); if (!answer_id) goto enomem_error; amt = smash_non_graphical (answer_id, str_length (answer_id)); answer_id[amt] = 0; goto return_answer_id; } /* Paths beginning with "./{arch}" are tagged with their own * path name, with the prefix "A_". The presumptions are that these * files never move, and that if a file is present, its contents are * invariant. */ if (is_at_or_underneath_archdir (as_file)) { long amt; answer_id = str_alloc_cat (0, "A_", as_file); if (!answer_id) goto enomem_error; amt = smash_non_graphical (answer_id, str_length (answer_id)); answer_id[amt] = 0; goto return_answer_id; } /* Try for an explicit id: */ if (known_lstat) stat_buf = known_lstat; else if (0 > vu_lstat (errn, fq_path, &local_stat_buf)) goto return_answer_id; else stat_buf = &local_stat_buf; if (S_ISDIR (stat_buf->st_mode)) { is_dir = 1; is_symlink = 0; id_file = file_name_in_vicinity (0, fq_path, ".arch-ids/=id"); if (!id_file) goto enomem_error; } else { is_dir = 0; is_symlink = S_ISLNK (stat_buf->st_mode); id_file = file_name_in_vicinity (0, dir, ".arch-ids/"); if (!id_file) goto enomem_error; id_file = str_realloc_cat (0, id_file, basename); if (!id_file) goto enomem_error; id_file = str_realloc_cat (0, id_file, ".id"); if (!id_file) goto enomem_error; id_file = str_replace (id_file, file_name_in_vicinity (0, tree->root, id_file)); if (!id_file) goto enomem_error; } *errn = 0; answer_id = explicit_id (errn, 0, 0, path, id_file, "x_", 0, stat_buf, tree->id_tagging_shortcut); if (answer_id || (*errn != ENOENT)) goto return_answer_id; else { /* Is there a .arch-ids/=all file here? */ lim_free (0, id_file); if (is_dir) id_file = file_name_in_vicinity (0, fq_path, ".arch-ids/=all"); else id_file = file_name_in_vicinity (0, dir, ".arch-ids/=all"); if (!id_file) goto enomem_error; *errn = 0; answer_id = explicit_id (errn, &tree->explicit_skips, 0, fq_path, id_file, "a_", (is_dir ? (t_uchar *)"./." : basename), stat_buf, NULL); if (answer_id || (*errn != ENOENT)) goto return_answer_id; if ((tree->tag_method == arch_implicit_id_tagging) && !is_dir && !is_symlink) { *errn = 0; answer_id = implicit_id (errn, 0, fq_path, basename, "i_", stat_buf, tree->id_tagging_shortcut); if (answer_id || *errn) goto return_answer_id; } if ((tree->tag_method == arch_tagline_id_tagging) && !is_dir && !is_symlink) { *errn = 0; answer_id = implicit_id (errn, 0, fq_path, 0, "i_", stat_buf, tree->id_tagging_shortcut); if (answer_id || *errn) goto return_answer_id; } /* is there an "=default" id? */ lim_free (0, id_file); if (is_dir) id_file = file_name_in_vicinity (0, fq_path, ".arch-ids/=default"); else id_file = file_name_in_vicinity (0, dir, ".arch-ids/=default"); if (!id_file) goto enomem_error; *errn = 0; answer_id = explicit_id (errn, &tree->explicit_skips, 0, fq_path, id_file, "w_", (is_dir ? (t_uchar *)"./." : basename), stat_buf, NULL); if (answer_id || (*errn != ENOENT)) goto return_answer_id; /* no explicit, =all, implicit, tagline, or =default id. */ if (tree->untagged_is_source) { int skipped; struct stat dont_care_stat_buf; lim_free (0, id_file); id_file = file_name_in_vicinity (0, dir, ".arch-ids/=dont-care"); if (!id_file) goto enomem_error; skipped = (!!assoc_ref (tree->explicit_skips, id_file)); if (!skipped && (0 <= vu_lstat (errn, id_file, &dont_care_stat_buf))) { long amt; answer_id = str_alloc_cat (0, "k_", as_file); if (!answer_id) goto enomem_error; amt = smash_non_graphical (answer_id, str_length (answer_id)); answer_id[amt] = 0; goto return_answer_id; } if (!skipped) assoc_set (&tree->explicit_skips, id_file, "yes"); if (*errn == ENOENT) { long amt; answer_id = str_alloc_cat (0, "?_", as_file); if (!answer_id) goto enomem_error; amt = smash_non_graphical (answer_id, str_length (answer_id)); answer_id[amt] = 0; goto return_answer_id; } else goto return_answer_id; } else { *errn = 0; goto return_answer_id; } } } t_uchar * arch_id_from_explicit_file (int *errn, t_uchar * path) { return explicit_id (errn, 0, 0, 0, path, "x_", 0, NULL, NULL); } static int is_at_or_underneath_archdir (char * rel_file) { static int compiled = 0; static regex_t pattern = {0,}; if (!compiled) { int re_error; re_error = regcomp (&pattern, "^(.*/)?(\\{arch\\}(/[a-zA-Z=][^/]*)*|\\{arch\\}/\\.arch-project-tree)$", REG_EXTENDED); invariant (!re_error); compiled = 1; } return filename_matches (&pattern, rel_file); } static int filename_matches (regex_t * pattern, char * filename) { int answer; answer = regexec (pattern, filename, 0, 0, 0); if (answer == REG_NOMATCH) return 0; if (answer == REG_NOERROR) return 1; panic ("unexpected regexec error in arch_inventory_traversal"); return -1; } t_uchar * explicit_id (int * errn, assoc_table * skips, struct alloc_limits * limits, t_uchar const * const arg_file, t_uchar * id_file, t_uchar * prefix, t_uchar * postfix, struct stat * statb, assoc_table id_tagging_shortcut) { int id_fd; t_uchar * answer; char buf[1024]; long amt; int ign; if (skips && assoc_ref (*skips, id_file)) { *errn = ENOENT; return 0; } if (id_tagging_shortcut) { struct stat stat_buf; /* prefix determines action.. * E_ - it's a .arch-ids file we are examinig * x_ - it's a normal file that /may/ have an id file * w_ a_ unsupported */ /* id_file is always the (prospective) id file */ /* TODO?: Cache the inode sig of the id file */ t_uchar * signature; t_uchar * cached_id; if (statb) stat_buf = *statb; else if (0 > vu_lstat (errn, (t_uchar *)arg_file, &stat_buf)) return 0; signature = arch_statb_inode_sig (&stat_buf); cached_id = assoc_ref (id_tagging_shortcut, signature); if (!cached_id) debug (dbg_invent, 8, "no cached id for signature '%s'\n", signature); lim_free (0, signature); if (cached_id) /* cache hit */ { if (!str_cmp (arg_file, id_file)) /* id of the .id file itself */ { if ((cached_id[0] == 'E') && (cached_id[1] == '_')) { debug (dbg_invent, 8, "Cached signature : '%s' on '%s'\n", cached_id, id_file); return str_save (0, cached_id); } /* unexpected cached result, ignore */ } else if ((cached_id[0] == 'x') && (cached_id[1] == '_')) { /* TODO: we could save 1 stat per .id file if we cached the results of the lookups somewhere - * .arch-ids is read before the files in the dir. */ /* this call here should get a cache-hit on the id shortcut file */ t_uchar *id_tag = explicit_id (errn, skips, limits, id_file, id_file, "E_", 0, 0, id_tagging_shortcut); if (id_tag && (!str_cmp (id_tag + 1, cached_id + 1))) { lim_free (0, id_tag); debug (dbg_invent, 8, "Cached signature on : '%s'\n", cached_id); return str_save (0, cached_id); } else lim_free (0, id_tag); } debug (dbg_invent, 8, "Cached signature '%s' not usable\n", cached_id); } } debug (dbg_invent, 8, "Reading '%s' for explicit id on '%s'\n", id_file, arg_file); id_fd = vu_open (errn, id_file, O_RDONLY, 0); if (id_fd < 0) { if (skips) assoc_set (skips, id_file, "yes"); return 0; } answer = str_save (limits, prefix); if (!answer) { enomem_error: *errn = ENOMEM; if (answer) lim_free (limits, answer); return 0; } while (1) { t_uchar * eol; amt = vu_read_retry (errn, id_fd, buf, sizeof (buf)); if (amt < 0) { lim_free (limits, answer); vu_close (&ign, id_fd); return 0; } if (!amt) break; eol = str_chr_index_n (buf, amt, '\n'); if (!eol) { t_uchar * old_answer; amt = smash_non_graphical (buf, amt); old_answer = answer; answer = str_realloc_cat_n (limits, answer, buf, amt); if (!answer) goto enomem_error; } else { t_uchar * old_answer; amt = eol - (t_uchar *)buf; amt = smash_non_graphical (buf, amt); old_answer = answer; answer = str_realloc_cat_n (limits, answer, buf, amt); if (!answer) goto enomem_error; break; } } if (postfix) answer = str_realloc_cat (limits, answer, postfix); if (!answer) goto enomem_error; vu_close (&ign, id_fd); debug (dbg_invent, 8, "Created signature : '%s' on '%s' (from '%s')\n", answer, arg_file, id_file); return answer; } static t_uchar * implicit_id (int * errn, struct alloc_limits * limits, t_uchar const * const file, t_uchar * basename, t_uchar * prefix, struct stat * statb, assoc_table id_tagging_shortcut) { int file_fd; struct stat file_stat_buf; char buf[1026]; /* one byte slack for \0 termination */ int amt; int line; int bottom; if (id_tagging_shortcut) { t_uchar * signature = arch_statb_inode_sig (statb); t_uchar * cached_id = assoc_ref (id_tagging_shortcut, signature); lim_free (0, signature); if (cached_id && (cached_id[0] == 'i') && (cached_id[1] == '_')) return str_save (0, cached_id); else if (cached_id) return 0; } /* This is a slightly screwy, historic interface. * * Passing `base != 0' means the old, larch-style tag syntax. * * Passing `base == 0' means tagline syntax. */ /* Search the file itself (last, then first 1K) for a line beginning: * * tla-style tagline id tagging (basename == 0) * ----------------------------------------- * * arch-tag: * * * larch-style implicit id tagging (basename != 0) * -------------------------------------------- * * basename- * * or * * tag: * * after the dash, skip any blanks -- the rest is the id. */ file_fd = vu_open (errn, (t_uchar *)file, O_RDONLY, 0); if (file_fd < 0) return 0; if (0 > vu_fstat (errn, file_fd, &file_stat_buf)) goto error_return; for (bottom = 1; bottom >= 0; --bottom) { if (!bottom) { if (0 > vu_lseek (errn, file_fd, 0, SEEK_SET)) { int ign; error_return: vu_close (&ign, file_fd); return 0; } amt = vu_read_retry (errn, file_fd, buf, sizeof (buf) - 2); if (amt < 0) goto error_return; } else { char * x; if (file_stat_buf.st_size > sizeof (buf) - 1) amt = sizeof (buf) - 1; else continue; /* Yes, this is a off by one error. However changing it * breaks existing file-ids */ if (0 > vu_lseek (errn, file_fd, -1026, SEEK_END)) goto error_return; amt = vu_read_retry (errn, file_fd, buf, sizeof (buf) - 1); if (amt < 0) goto error_return; x = str_chr_index_n (buf, amt, '\n'); if (!x) continue; amt = amt - (1 + x - buf); mem_move (buf, x + 1, amt); } buf[amt] = 0; line = 0; while (1) { int is_inventory_id; /* skip punctuation and blanks at the start of the line */ while ((line < amt) && (char_is_punct (buf[line]) || char_is_blank (buf[line]))) ++line; if (line == amt) break; if (buf[line] == '\n') { ++line; continue; } is_inventory_id = (basename ? !str_cmp_prefix ("tag:", buf + line) : !str_cmp_prefix ("arch-tag:", buf + line)); if ( !is_inventory_id && (!basename || str_cmp_prefix (basename, buf + line))) { t_uchar * eol; not_this_line: eol = str_chr_index_n (buf + line, amt - line, '\n'); if (!eol) break; line = eol - (t_uchar *)buf; } else { t_uchar * eol; if (is_inventory_id) line += (basename ? str_length ("tag:") : str_length ("arch-tag:")); else line += str_length (basename); if (!is_inventory_id) { while ((line < amt) && char_is_blank (buf[line])) ++line; if (line == amt) break; if (buf[line] != '-') goto not_this_line; ++line; } if (line == amt) break; /* This is the tag line. */ while ((line < amt) && char_is_blank (buf[line])) ++line; eol = str_chr_index_n (buf + line, amt - line, '\n'); if (!eol) /* end of buffer no eol */ { eol = buf + amt; if (!bottom) if (file_stat_buf.st_size > sizeof (buf) - 1) safe_printfmt (2, "Warning: top-of-file truncated tag in: %s\n", file); } if (0 == (eol - (t_uchar *)(buf + line))) { /* an empty id */ break; } { long size; t_uchar * answer; size = smash_non_graphical (buf + line, eol - (t_uchar *)(buf + line)); answer = str_alloc_cat_n (limits, prefix, buf + line, size); if (0 > vu_close (errn, file_fd)) goto error_return; if (!answer) *errn = ENOMEM; return answer; } } } } if (0 > vu_close (errn, file_fd)) goto error_return; *errn = 0; return 0; } static long smash_non_graphical (t_uchar * buf, long amt) { long x; while (amt > 0) { if (!char_is_graph (buf[amt - 1])) --amt; else break; } for (x = 0; x < amt; ++x) { if (!char_is_graph (buf[x])) buf[x] = '_'; } return amt; } /** * \brief convenience function for arbitrary id retrieval. * * please us arch_inventory_id if you have a tree object * \param some_path the filepath relative or absolute * \param method set to other than arch_unspecified_id_tagging to override for trees that support overriding. */ t_uchar * arch_id_for_path(t_uchar const * const some_path, enum arch_id_tagging_method method) { arch_project_tree_t * tree; t_uchar * dir; t_uchar * answer = NULL; t_uchar * rel_path; t_uchar * abs_path = arch_abs_path (some_path); if (safe_file_is_directory (some_path)) dir = str_save (0, some_path); else { dir = file_name_directory_file (0, abs_path); } tree = arch_project_tree_new_ext (talloc_context, dir, 1, 0); /* load the inventory rules */ arch_tree_id_tagging_method (tree, NULL, 0); /* override them if needed */ if (method != arch_unspecified_id_tagging) tree->tag_method = method; rel_path = arch_project_tree_rel_path_from_abs (tree, abs_path); answer = arch_inventory_id (tree, rel_path, 0); arch_project_tree_delete (tree); lim_free (0, dir); lim_free (0, abs_path); lim_free (0, rel_path); return answer; } /* tag: Tom Lord Wed May 14 07:20:26 2003 (inv-tags.c) */