/* inv-ids.c:
*
****************************************************************
* Copyright (C) 2002, 2003 Tom Lord
*
* See the file "COPYING" for further information about
* the copyright and warranty status of this work.
*/
#include "hackerlab/bugs/panic.h"
#include "hackerlab/bugs/exception.h"
#include "hackerlab/os/errno.h"
#include "hackerlab/os/errno-to-string.h"
#include "hackerlab/os/time.h"
#include "hackerlab/os/sys/types.h"
#include "hackerlab/os/unistd.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/mem/talloc.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/fmt/cvt.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/safe.h"
#include "hackerlab/arrays/ar.h"
#include "libfsutils/ensure-dir.h"
#include "libarch/my.h"
#include "libarch/project-tree.h"
#include "libarch/patch-logs.h"
#include "libarch/invent.h"
#include "libarch/inode-sig.h"
#include "libarch/inv-ids.h"
#include "libarch/pfs.h"
#include "hackerlab/fs/cwd.h"
#include "po/gettext.h"
#include "libawk/relational.h"
#include "libarch/changelogs.h"
#include "libarch/debug.h"
/* __STDC__ prototypes for static functions */
static int is_at_or_underneath_archdir (char * rel_file);
static int filename_matches (regex_t * pattern, char * filename);
static t_uchar * explicit_id (int * errn,
assoc_table * skips,
struct alloc_limits * limits,
t_uchar const * const arg_file,
t_uchar * id_file,
t_uchar * prefix,
t_uchar * postfix,
struct stat * statb,
assoc_table id_tagging_shortcut);
static t_uchar * implicit_id (int * errn,
struct alloc_limits * limits,
t_uchar const * const file,
t_uchar * basename,
t_uchar * prefix,
struct stat * statb,
assoc_table id_tagging_shortcut);
static long smash_non_graphical (t_uchar * buf, long amt);
t_uchar *
arch_log_file_id (t_uchar * archive, t_uchar * revision)
{
t_uchar * log_file_path = 0;
t_uchar * answer = 0;
log_file_path = arch_log_file (".", archive, revision);
answer = str_alloc_cat (0, "A_", log_file_path);
lim_free (0, log_file_path);
return answer;
}
assoc_table
arch_filenames_ids (rel_table * file_list, arch_project_tree_t * tree)
{
struct arch_inventory_options options = {0, };
int i = 0;
int file_size = rel_n_records (* file_list);
assoc_table id_list = 0;
options.categories = arch_inventory_source;
options.want_ids = 1;
options.include_excluded = 1;
arch_get_inventory_naming_conventions (&options, tree);
if (tree->untagged_is_source)
{
/* FIXME if this is annoying remove it. but its probably a bug to do this at all
*/
safe_printfmt (2, "overriding untagged is source for arch_filenames_ids\n");
tree->untagged_is_source = 0;
}
for (i = 0; i != file_size; ++ i)
{
t_uchar * id = arch_inventory_id (tree, (* file_list) [i] [0], 0);
assoc_set (&id_list, id, (* file_list) [i] [0]);
lim_free (0, id);
}
arch_free_inventory_naming_conventions (&options);
return id_list;
}
/**
* \brief retrieve a file id for an arbitrary file on disk
* \param tree the tree to get the id for.
* \param method override the inventory logic, use arch_unspecified_id_tagging to autodetect.
* \param untagged_is_source give source file magic ids
* \param path the filename to inventory
* \param id_tagging_shortcut cached id tags
* \param known_lstat prior stat result for this file
* \param explicit_skips files to not id
*/
t_uchar *
arch_inventory_id (arch_project_tree_t * tree,
t_uchar const * const path,
struct stat * known_lstat)
{
t_uchar * answer;
inventory_entry_t *entry = arch_project_tree_path_inventory (tree, path, known_lstat);
if (entry)
answer = str_save (0, entry->id);
else
answer = NULL;
/* the tree gives us a reference ... for transition purposes.
* FIXME RBC 20050331 the tree should just give us a pointer, not a reference locked
* object, and cache ids sanely. not possible for hackerlab trees yet.
*/
talloc_free (entry);
return answer;
}
t_uchar *
arch_id_tagging_method_name (enum arch_id_tagging_method m)
{
switch (m)
{
default:
panic ("unknown id tagging method (arch_id_tagging_method_name)");
return 0; /* not reached */
case arch_names_id_tagging: return str_save (0, "names");
case arch_implicit_id_tagging: return str_save (0, "implicit");
case arch_tagline_id_tagging: return str_save (0, "tagline");
case arch_explicit_id_tagging: return str_save (0, "explicit");
}
}
enum arch_id_tagging_method
arch_id_tagging_method_from_name (t_uchar * name)
{
if (!str_casecmp (name, "explicit"))
return arch_explicit_id_tagging;
else if (!str_casecmp (name, "implicit"))
return arch_implicit_id_tagging;
else if (!str_casecmp (name, "tagline"))
return arch_tagline_id_tagging;
else if (!str_casecmp (name, "names"))
return arch_names_id_tagging;
else
{
safe_printfmt (2, "no such id tagging method (%s)\n", name);
exit (2);
return arch_names_id_tagging; /* notreached */
}
}
t_uchar *
arch_default_id_tagging_method_contents (enum arch_id_tagging_method method)
{
t_uchar * method_name = 0;
t_uchar * excludes_regexp = 0;
t_uchar * junk_regexp = 0;
t_uchar * backup_regexp = 0;
t_uchar * precious_regexp = 0;
t_uchar * unrecognized_regexp = 0;
t_uchar * source_regexp = 0;
t_uchar * answer = 0;
if (method == arch_unspecified_id_tagging)
method = arch_explicit_id_tagging;
method_name = arch_id_tagging_method_name (method);
excludes_regexp = arch_default_naming_conventions_regexp (arch_inventory_excludes);
backup_regexp = arch_default_naming_conventions_regexp (arch_inventory_backup);
junk_regexp = arch_default_naming_conventions_regexp (arch_inventory_junk);
precious_regexp = arch_default_naming_conventions_regexp (arch_inventory_precious);
unrecognized_regexp = arch_default_naming_conventions_regexp (arch_inventory_unrecognized);
source_regexp = arch_default_naming_conventions_regexp (arch_inventory_source);
answer = str_alloc_cat_many (0,
("# id tagging method\n"
"#\n"
"# This determines how \"inventory ids\", strings conveying\n"
"# logical file identity, are computed for each file, directory\n"
"# and symbolic link.\n"
"#\n"
"# The choices are:\n"
"#\n"
"# tagline: inventory ids may be set using add-id, or omitted\n"
"# (though tree-lint warns about omitted ids), or in\n"
"# text files, set in a comment line near the top or\n"
"# bottom of the file of a form like \"<PUNCT> arch-tag: <STRING>\".\n"
"# Renames of files with no id are treated as a combined\n"
"# add and delete (e.g., local changes can be lost).\n"
"#\n"
"# explicit: ids must be set using add-id. Files passing the naming\n"
"# conventions for source, but lacking add-id ids, are treated\n"
"# as unrecognized files (see below).\n"
"#\n"
"# names: ids are not used. All renames are treated as add+delete\n"
"#\n"
"# implicit: similar to tagline, but in addition, the id comment\n"
"# may be of the form \"<PUNCT> <BASENAME> - <STRING>\", where\n"
"# <BASENAME> is the basename of the file. This method\n"
"# is not recommended, but is retained for backwards\n"
"# compatibility.\n"
"#\n"
"\n"),
method_name, "\n",
"\n",
("# disposition of untagged source files\n"
"#\n"
"# (NOTE: this option must follow the tagline/explicit/names/implicit\n"
"# directive.)\n"
"#\n"
"# By default, the explicit method treats untagged files matching the naming\n"
"# conventions for source files as unrecognized and the implicit and tagline\n"
"# methods treat such untagged files as source.\n"
"#\n"
"# You can override those default treatments of untagged files by specifying\n"
"# which inventory category (see below) should be used for files whose names\n"
"# suggest they are source but which lack ids.\n"
"#\n"
"# This feature may be especially convenient when importing sources that do\n"
"# not use file naming conventions that can be conveniently described with\n"
"# the regexps below.\n"
"#\n"
"# Uncomment one of these lines as appropriate to override the default:\n"
"#\n"
"# untagged-source source\n"
"untagged-source precious\n"
"# untagged-source backup\n"
"# untagged-source junk\n"
"# untagged-source unrecognized\n"
"#\n"
"\n"),
("# naming convention regexps\n"
"#\n"
"# For various commands, arch traverses your project trees, categorizing\n"
"# the files found there. For example, when importing a project for\n"
"# the first time, this traversal determines which files are included\n"
"# in the import.\n"
"#\n"
"# The categories of greatest importance are defined in terms of three\n"
"# questions:\n"
"#\n"
"# 1) If arch makes a local copy of this tree, should this file be included\n"
"# in the copy?\n"
"#\n"
"# 2) Is it generally safe to remove this file based only on how it is named?\n"
"# For example, can it be safely clobbered by a new file of the same name?\n"
"#\n"
"# 3) Should this file be archived along with the project? For example,\n"
"# should it be included when importing the project for the first time?\n"
"#\n"
"# The primary categories are:\n"
"#\n"
"# category: copy locally? safe to clobber? archive?\n"
"#\n"
"# junk no yes no\n"
"# backup no no no\n"
"# precious yes no no\n"
"# source yes no yes\n"
"#\n"
"# There are two additional categories, unrelated to those questions:\n"
"#\n"
"# excluded -- during a traversal by inventory, this file (and,\n"
"# if a directory, its contents) are simply ignored unless the\n"
"# --all flag is specified. This category is usually used to\n"
"# omit arch's own control files from a listing.\n"
"#\n"
"# unrecognized -- a category for files whose name fits no other pattern.\n"
"# Usually, the presence of unrecognized files is treated as an\n"
"# error. You can use the naming conventions to define certain\n"
"# names as \"deliberately unrecognized\" -- i.e., filenames whose\n"
"# presence in a source tree you _want_ to be treated as an error\n"
"#\n"
"# The traveral algorithm is described here, along with lines you can edit to\n"
"# customize the naming conventions.\n"
"#\n"
"# Starting at \".\" within a project tree (usually at the root of the\n"
"# project tree) consider each filename in that directory.\n"
"#\n"
"# The files \".\" and \"..\" are simply ignored.\n"
"#\n"
"# Files containing \"illegal characters\" are characterized as unrecognized.\n"
"# If they are directories, traversal does _not_ descend into those directories.\n"
"# Currently, the illegal characters are *, ?, [, ], \\, space, and tab.\n"
"# (The set of illegal characters may shrink in future releases.)\n"
"#\n"
"# In an interactive call to inventory _without_ the --all flag,\n"
"# names are next compared to the exclude regexp defined here. Those that\n"
"# are ignored and not descended below. (Most arch operations performing\n"
"# traversals internally, e.g. import, do not use this pattern\n"
"# and skip this step of the algorithm.\n"
"#\n"),
"\n",
"exclude ", excludes_regexp, "\n",
"\n",
("# If the file has a name that begins with \"++\", it is categorized as\n"
"# _precious_. Names of this form are hard-wired and reserved for use by arch\n"
"# itself. Traversal does not descend into precious directories, but when a\n"
"# precious directory is copied, its contents are recursively copied.\n"
"#\n"
"# Files and directories that reach this stage and which arch recognizes as its\n"
"# own control files are classified at this step as source. Traversal _does_\n"
"# descend into source directories.\n"
"#\n"
"# If the file has a name that begins with \",,\", it is categorized as _junk_.\n"
"# Names of this form are hard-wired and reserved for use by arch and other tools,\n"
"# and arch may clobber such files without warning. In a project tree, when no \n"
"# arch commands are running, it is safe for users to delete any \",,\" files. \n"
"# Although the general rule for junk files is that arch is free to clobber them,\n"
"# in fact, arch will only ever clobber files starting with \",,\".\n"
"#\n"
"# Traversal does not descend into junk directories.\n"
"#\n"
"# For your convenience, at this step of the traversal, you can classify\n"
"# additional files as junk or precious:\n"
"#\n"),
"\n",
"junk ", junk_regexp, "\n",
"\n",
"precious ", precious_regexp, "\n",
"\n",
("# Files matching the following regexp are classified as backup files, and\n"
"# traversal does not descend into backup directories:\n"
"#\n"),
"\n",
"backup ", backup_regexp, "\n",
"\n",
("# If you want to force certain filenames to be treated as errors when present,\n"
"# you can add them to the regexp for deliberately unrecognized files. Traversal\n"
"# does not descend into unrecognized directories.\n"),
"\n",
"unrecognized ", unrecognized_regexp, "\n",
"\n",
("# Files which match the following pattern are treated as source files.\n"
"# Traversal _does_ descend into source directories:\n"),
"\n",
"source ", source_regexp, "\n",
"\n",
("# Any files not classified by the above rules are classified as unrecognized.\n"
"# Traversal does not descend into unrecognized directories.\n"
"\n"),
str_end);
lim_free (0, method_name);
lim_free (0, excludes_regexp);
lim_free (0, junk_regexp);
lim_free (0, backup_regexp);
lim_free (0, precious_regexp);
lim_free (0, unrecognized_regexp);
lim_free (0, source_regexp);
return answer;
}
t_uchar *
arch_tree_id_tagging_method_file (arch_project_tree_t * tree)
{
t_uchar * ctl_dir;
t_uchar * answer;
ctl_dir = arch_tree_ctl_dir (tree->root);
answer = file_name_in_vicinity (0, ctl_dir, "=tagging-method");
lim_free (0, ctl_dir);
return answer;
}
/**
* \brief determing the id tagging method for a tree
*
* this is the only function allowed to call vtable->id_tagging_method directly,
* all other functions should call this wrapper
*/
enum arch_id_tagging_method
arch_tree_id_tagging_method (arch_project_tree_t * tree, enum arch_inventory_category * cat_var, int strict)
{
return tree->vtable->id_tagging_method (tree, cat_var, strict);
}
void
arch_set_tree_id_tagging_method (arch_project_tree_t * tree,
enum arch_id_tagging_method method)
{
int errn;
t_uchar * method_name;
t_uchar * method_file;
t_uchar * method_dir;
t_uchar * method_tmp;
int out_fd;
method_name = arch_id_tagging_method_name (method);
method_file = arch_tree_id_tagging_method_file (tree);
method_dir = file_name_directory_file (0, method_file);
method_tmp = file_name_in_vicinity (0, method_dir, ",,tagging-method");
vu_unlink (&errn, method_tmp);
out_fd = safe_open (method_tmp, O_WRONLY | O_CREAT | O_EXCL, 0666);
if (safe_access (method_file, F_OK))
{
safe_printfmt (out_fd, "%s\n", method_name);
}
else
{
int in_fd;
t_uchar * line;
long len;
int emitted_method;
in_fd = safe_open (method_file, O_RDONLY, 0);
emitted_method = 0;
while (1)
{
t_uchar * pos;
t_uchar * lim;
int line_maybe_specifies_method;
enum arch_id_tagging_method maybe_method;
int replace_with_method_name;
safe_next_line (&line, &len, in_fd);
if (!line)
break;
lim = line + len;
pos = line;
line_maybe_specifies_method = 0;
replace_with_method_name = 0;
while ((pos < lim) && char_is_blank (*pos))
++pos;
if (((lim - pos) >= (sizeof ("names") - 1)) && !str_casecmp_n ("names", sizeof ("names") - 1, pos, sizeof ("names") - 1))
{
line_maybe_specifies_method = 1;
maybe_method = arch_names_id_tagging;
pos += sizeof ("names") - 1;
}
else if (((lim - pos) >= (sizeof ("explicit") - 1)) && !str_casecmp_n ("explicit", sizeof ("explicit") - 1, pos, sizeof ("explicit") - 1))
{
line_maybe_specifies_method = 1;
maybe_method = arch_explicit_id_tagging;
pos += sizeof ("explicit") - 1;
}
else if (((lim - pos) >= (sizeof ("implicit") - 1)) && !str_casecmp_n ("implicit", sizeof ("implicit") - 1, pos, sizeof ("implicit") - 1))
{
line_maybe_specifies_method = 1;
maybe_method = arch_implicit_id_tagging;
pos += sizeof ("implicit") - 1;
}
else if (((lim - pos) >= (sizeof ("tagline") - 1)) && !str_casecmp_n ("tagline", sizeof ("tagline") - 1, pos, sizeof ("tagline") - 1))
{
line_maybe_specifies_method = 1;
maybe_method = arch_tagline_id_tagging;
pos += sizeof ("tagline") - 1;
}
if (line_maybe_specifies_method)
{
while ((pos < lim) && char_is_space (*pos))
++pos;
if (pos == lim)
replace_with_method_name = 1;
}
if (replace_with_method_name)
{
safe_printfmt (out_fd, "%s\n", method_name);
emitted_method = 1;
}
else
{
safe_printfmt (out_fd, "%.*s", (int)len, line);
if (len && (line[len - 1] != '\n'))
safe_printfmt (out_fd, "\n");
}
}
if (!emitted_method)
safe_printfmt (out_fd, "%s\n", method_name);
safe_close (in_fd);
}
safe_close (out_fd);
safe_rename (method_tmp, method_file);
lim_free (0, method_name);
lim_free (0, method_file);
lim_free (0, method_dir);
lim_free (0, method_tmp);
}
t_uchar *
arch_explicit_id_file_for (t_uchar * path)
{
int errn;
struct stat stat_buf;
int is_file;
t_uchar * parent_dir;
t_uchar * dot_arch_dir;
t_uchar * id_file_basename;
t_uchar * id_file_path;
if (vu_lstat (&errn, path, &stat_buf))
{
if (errn != ENOENT)
{
safe_printfmt (2, "i/o error (%d: %s) for vu_lstat of %s\n", errn, errno_to_string (errn), path);
exit (2);
}
is_file = 1;
}
else
{
is_file = !S_ISDIR (stat_buf.st_mode);
}
if (is_file)
parent_dir = file_name_directory_file (0, path);
else
parent_dir = str_save (0, path);
dot_arch_dir = file_name_in_vicinity (0, parent_dir, ".arch-ids");
if (is_file)
{
id_file_basename = file_name_tail (0, path);
id_file_basename = str_realloc_cat (0, id_file_basename, ".id");
}
else
id_file_basename = str_save (0, "=id");
id_file_path = file_name_in_vicinity (0, dot_arch_dir, id_file_basename);
lim_free (0, parent_dir);
lim_free (0, dot_arch_dir);
lim_free (0, id_file_basename);
return id_file_path;
}
t_uchar *
arch_generate_id (void)
{
static unsigned long seq = 0;
time_t now;
char * now_str;
t_uchar * nl;
t_uchar * my_id;
pid_t my_pid;
t_uchar my_pid_str[128];
t_uchar seq_str[128];
t_uchar * id;
if (0 > time (&now))
panic ("unable to get time of day in arch_generate_id");
now_str = ctime (&now);
nl = str_chr_index (now_str, '\n');
if (nl)
*nl = 0;
my_id = arch_my_id ();
my_pid = getpid ();
cvt_ulong_to_decimal (my_pid_str, (unsigned long)my_pid);
cvt_ulong_to_decimal (seq_str, (unsigned long)seq);
++seq;
id = str_alloc_cat_many (0, my_id, " ", now_str, " ", my_pid_str, ".", seq_str, str_end);
lim_free (0, my_id);
return id;
}
/* precondition: path must be accessible */
t_uchar *
arch_choose_explicit_id (arch_project_tree_t * tree, t_uchar * path, t_uchar * id)
{
t_uchar * tagline_id;
t_uchar * new_explicit_id;
int old_untagged_source;
enum arch_id_tagging_method old_tagging;
/* this sets untagged_is_source 0 so that we don't get names ids
* however, this should not be needed - we should ask for the implicit
* id directly
* RBC 20050321
*/
old_untagged_source = tree->untagged_is_source;
tree->untagged_is_source = 0;
/* FIXME RBC 20050321 overriding the tagging method is not necesarily possible
* fix this leaky abstraction (perhaps 'has_real_id ?') */
old_tagging = tree->tag_method;
tree->tag_method = arch_tagline_id_tagging;
tagline_id = arch_inventory_id (tree, path, 0);
tree->untagged_is_source = old_untagged_source;
tree->tag_method = old_tagging;
if (!tagline_id || !arch_id_indicates_changelog (tagline_id))
if (!id)
new_explicit_id = arch_generate_id ();
else
new_explicit_id = str_save (0, id);
else
new_explicit_id = str_save (0, 2 + tagline_id);
lim_free (0, tagline_id);
return new_explicit_id;
}
int
str_cmp_suffix_n(t_uchar * a_string, t_uchar * b_string, int length)
{
int len_a = str_length(a_string);
int len_b = str_length(b_string);
int cmp_length = (len_a < len_b) ? len_a : len_b;
cmp_length = (cmp_length < length) ? cmp_length : length;
return str_cmp(a_string + len_a - cmp_length, b_string + len_b - cmp_length);
}
static int
path_exists (char const * path)
{
struct stat stat_buf;
int errn;
int answer;
answer = vu_lstat (&errn, (char *)path, &stat_buf);
return answer == 0;
}
t_uchar *
arch_add_id (t_uchar * path, t_uchar * id)
{
t_uchar * new_explicit_id;
arch_project_tree_t * tree;
t_uchar * rel_path;
t_uchar * full_path;
/* don't add ids for non existing paths */
if (!path_exists (path))
{
safe_printfmt (2, "attempt to add missing path: %s\n", path);
return NULL;
}
/* don't add ids for control paths */
tree = arch_project_tree_new (talloc_context, path);
full_path = arch_abs_path (path);
rel_path = arch_project_tree_rel_path_from_abs (tree, full_path);
if (arch_is_control_file (rel_path, NULL) && str_cmp_suffix_n(path, ".arch-inventory", 15))
{
lim_free (0, full_path);
lim_free (0, rel_path);
arch_project_tree_delete (tree);
return NULL;
}
lim_free (0, full_path);
debug (dbg_invent, 3, _("getting id for (%s)\n"), rel_path);
new_explicit_id = arch_choose_explicit_id (tree, rel_path, id); /* FIXME path here is full not rel */
arch_add_explicit_id (path, new_explicit_id);
arch_project_tree_delete (tree);
lim_free (0, rel_path);
return new_explicit_id;
}
void
arch_add_explicit_id (t_uchar * path, t_uchar * id)
{
t_uchar * id_file;
t_uchar * id_dir;
int out_fd;
id_file = arch_explicit_id_file_for (path);
if (!safe_access (id_file, F_OK))
{
safe_printfmt (2, "attempt to id already tagged file: %s\n", path);
exit (2);
}
id_dir = file_name_directory_file (0, id_file);
ensure_directory_exists (id_dir);
out_fd = safe_open (id_file, O_WRONLY | O_CREAT | O_EXCL, 0666);
safe_printfmt (out_fd, "%s\n", id);
safe_close (out_fd);
lim_free (0, id_file);
lim_free (0, id_dir);
}
void
arch_delete_explicit_id (t_uchar * path)
{
t_uchar * id_file;
id_file = arch_explicit_id_file_for (path);
if (!safe_access (id_file, F_OK))
safe_unlink (id_file);
else
{
safe_printfmt (2, "attempt to remove non-existent id for %s\n", path);
exit (2);
}
lim_free (0, id_file);
}
void
arch_move_explicit_id (t_uchar * from, t_uchar * to)
{
t_uchar * old_id_file;
t_uchar * new_id_file;
t_uchar * new_id_dir;
old_id_file = arch_explicit_id_file_for (from);
new_id_file = arch_explicit_id_file_for (to);
new_id_dir = file_name_directory_file (0, new_id_file);
ensure_directory_exists (new_id_dir);
safe_rename (old_id_file, new_id_file);
lim_free (0, old_id_file);
lim_free (0, new_id_file);
lim_free (0, new_id_dir);
}
t_uchar *
arch_strong_explicit_dflt_file (t_uchar * dir)
{
t_uchar * dot_arch_dir;
t_uchar * answer;
dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
answer = file_name_in_vicinity (0, dot_arch_dir, "=all");
lim_free (0, dot_arch_dir);
return answer;
}
t_uchar *
arch_weak_explicit_dflt_file (t_uchar * dir)
{
t_uchar * dot_arch_dir;
t_uchar * answer;
dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
answer = file_name_in_vicinity (0, dot_arch_dir, "=default");
lim_free (0, dot_arch_dir);
return answer;
}
t_uchar *
arch_dont_care_explicit_dflt_file (t_uchar * dir)
{
t_uchar * dot_arch_dir;
t_uchar * answer;
dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
answer = file_name_in_vicinity (0, dot_arch_dir, "=dont-care");
lim_free (0, dot_arch_dir);
return answer;
}
int
arch_is_dont_care_explicit_dflt_dir (t_uchar * dir)
{
t_uchar * file = 0;
int answer;
file = arch_dont_care_explicit_dflt_file (dir);
answer = !safe_access (file, F_OK);
lim_free (0, file);
return answer;
}
void
arch_delete_strong_explicit_default (t_uchar * dir)
{
t_uchar * file;
file = arch_strong_explicit_dflt_file (dir);
if (safe_access (file, F_OK))
{
safe_printfmt (2, "attempt to delete non-existing strong explicit default in %s\n", dir);
exit (2);
}
safe_unlink (dir);
lim_free (0, file);
}
void
arch_delete_weak_explicit_default (t_uchar * dir)
{
t_uchar * file;
file = arch_weak_explicit_dflt_file (dir);
if (safe_access (file, F_OK))
{
safe_printfmt (2, "attempt to delete non-existing weak explicit default in %s\n", dir);
exit (2);
}
safe_unlink (dir);
lim_free (0, file);
}
void
arch_delete_dont_care_explicit_default (t_uchar * dir)
{
t_uchar * file;
file = arch_dont_care_explicit_dflt_file (dir);
if (safe_access (file, F_OK))
{
safe_printfmt (2, "attempt to delete non-existing dont-care explicit default in %s\n", dir);
exit (2);
}
safe_unlink (dir);
lim_free (0, file);
}
void
arch_set_strong_explicit_default (t_uchar * dir, t_uchar * id)
{
t_uchar * file;
t_uchar * file_dir;
int out_fd;
file = arch_strong_explicit_dflt_file (dir);
file_dir = file_name_directory_file (0, file);
if (!safe_access (file, F_OK))
{
safe_printfmt (2, "attempt to overwrite strong explicit default in %s\n", dir);
exit (2);
}
ensure_directory_exists (file_dir);
out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
safe_printfmt (out_fd, "%s\n", id);
safe_close (out_fd);
lim_free (0, file);
lim_free (0, file_dir);
}
void
arch_set_weak_explicit_default (t_uchar * dir, t_uchar * id)
{
t_uchar * file;
t_uchar * file_dir;
int out_fd;
file = arch_weak_explicit_dflt_file (dir);
file_dir = file_name_directory_file (0, file);
if (!safe_access (file, F_OK))
{
safe_printfmt (2, "attempt to overwrite weak explicit default in %s\n", dir);
exit (2);
}
ensure_directory_exists (file_dir);
out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
safe_printfmt (out_fd, "%s\n", id);
safe_close (out_fd);
lim_free (0, file);
lim_free (0, file_dir);
}
void
arch_set_dont_care_explicit_default (t_uchar * dir)
{
t_uchar * file;
t_uchar * file_dir;
int out_fd;
file = arch_dont_care_explicit_dflt_file (dir);
file_dir = file_name_directory_file (0, file);
if (safe_access (file, F_OK))
{
ensure_directory_exists (file_dir);
out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
safe_close (out_fd);
}
lim_free (0, file);
lim_free (0, file_dir);
}
/**
* \brief return a newly allocated string with the inventory id of the file / directory.
*
* WARNING. DO NOT CALL THIS FUNCTION. It is private to tla-project-tree.c, but not
* relocated until more of the object model clears up.
* Return 0 and set `*errn' if the id can not be computed.
* If no I/O error occurs, the file does not have an explicit
* id, but `method' is `ftag_explicit', `*errn' is set to 0.
* \param errn put error details here
* \param tree the tree to get the id for
* \param limits unused ?
* \param method override the tagging behaviour of the tree
* \param untagged_is_source override the untagged-source bahviour of the tree
* \param path the relative to cwd, or abs path to the file.
* \param id_tagging_shortcut cached ids
* \param known_lstat cached stat result
* \param explicit_skips files to skip
*/
inventory_entry_t *
tla_path_id (arch_project_tree_t * tree,
int * errn,
t_uchar const * const path,
struct stat * known_lstat)
{
t_uchar * answer_id = 0;
t_uchar * as_file = 0;
t_uchar * basename = 0;
t_uchar * dir = 0;
t_uchar * dir_as_file = 0;
t_uchar * dir_basename = 0;
t_uchar * id_file = 0;
t_uchar * fq_path = NULL;
struct stat local_stat_buf;
struct stat *stat_buf = NULL;
int is_dir;
int is_symlink;
if (!path)
{
*errn = EINVAL;
return_answer_id:
lim_free (0, as_file);
lim_free (0, dir);
lim_free (0, dir_as_file);
lim_free (0, dir_basename);
lim_free (0, id_file);
{
/* FIXME RBC 20050331 typed returns - dir link etc */
inventory_entry_t *answer = NULL;
/* FIXME RBC 20050331 make this cleaner and only-called once */
if (answer_id)
{
if (!stat_buf)
{
if (known_lstat)
stat_buf = known_lstat;
else
{
stat_buf = &local_stat_buf;
if (vu_lstat (errn, fq_path, stat_buf))
{
lim_free (0, basename);
lim_free (0, fq_path);
return NULL;
}
}
}
if (S_ISDIR (stat_buf->st_mode))
answer = inventory_dir_new (NULL, basename, answer_id);
else if (S_ISLNK (stat_buf->st_mode))
answer = inventory_link_new (NULL, basename, answer_id);
else
answer = inventory_file_new (NULL, basename, answer_id);
}
lim_free (0, basename);
lim_free (0, fq_path);
return answer;
}
}
as_file = file_name_from_directory (0, (t_uchar *)path);
if (!as_file)
{
enomem_error:
*errn = ENOMEM;
goto return_answer_id;
}
fq_path = file_name_in_vicinity (0, tree->root, as_file);
basename = file_name_tail (0, as_file);
if (tree->tag_method == arch_names_id_tagging)
{
answer_id = str_alloc_cat (0, "?", as_file);
if (!answer_id)
goto enomem_error;
else
goto return_answer_id;
}
dir = file_name_directory (0, as_file);
if (!dir)
dir = str_save (0, ".");
if (!(basename && dir))
goto enomem_error;
dir_as_file = file_name_from_directory (0, dir);
if (!dir_as_file)
goto enomem_error;
dir_basename = file_name_tail (0, dir_as_file);
if (!dir_basename)
goto enomem_error;
/* Explicit id files use their contents as id, with the
* prefix 'E'.
*/
if (!str_cmp (dir_basename, ".arch-ids"))
{
answer_id = explicit_id (errn, 0, 0, fq_path, fq_path, "E_", 0, known_lstat, tree->id_tagging_shortcut);
goto return_answer_id;
}
/* Explicit id file directories:
*/
if (!str_cmp (basename, ".arch-ids"))
{
long amt;
answer_id = str_alloc_cat (0, "D_", as_file);
if (!answer_id)
goto enomem_error;
amt = smash_non_graphical (answer_id, str_length (answer_id));
answer_id[amt] = 0;
goto return_answer_id;
}
/* Paths beginning with "./{arch}" are tagged with their own
* path name, with the prefix "A_". The presumptions are that these
* files never move, and that if a file is present, its contents are
* invariant.
*/
if (is_at_or_underneath_archdir (as_file))
{
long amt;
answer_id = str_alloc_cat (0, "A_", as_file);
if (!answer_id)
goto enomem_error;
amt = smash_non_graphical (answer_id, str_length (answer_id));
answer_id[amt] = 0;
goto return_answer_id;
}
/* Try for an explicit id:
*/
if (known_lstat)
stat_buf = known_lstat;
else if (0 > vu_lstat (errn, fq_path, &local_stat_buf))
goto return_answer_id;
else
stat_buf = &local_stat_buf;
if (S_ISDIR (stat_buf->st_mode))
{
is_dir = 1;
is_symlink = 0;
id_file = file_name_in_vicinity (0, fq_path, ".arch-ids/=id");
if (!id_file)
goto enomem_error;
}
else
{
is_dir = 0;
is_symlink = S_ISLNK (stat_buf->st_mode);
id_file = file_name_in_vicinity (0, dir, ".arch-ids/");
if (!id_file)
goto enomem_error;
id_file = str_realloc_cat (0, id_file, basename);
if (!id_file)
goto enomem_error;
id_file = str_realloc_cat (0, id_file, ".id");
if (!id_file)
goto enomem_error;
id_file = str_replace (id_file, file_name_in_vicinity (0, tree->root, id_file));
if (!id_file)
goto enomem_error;
}
*errn = 0;
answer_id = explicit_id (errn, 0, 0, path, id_file, "x_", 0, stat_buf, tree->id_tagging_shortcut);
if (answer_id || (*errn != ENOENT))
goto return_answer_id;
else
{
/* Is there a .arch-ids/=all file here?
*/
lim_free (0, id_file);
if (is_dir)
id_file = file_name_in_vicinity (0, fq_path, ".arch-ids/=all");
else
id_file = file_name_in_vicinity (0, dir, ".arch-ids/=all");
if (!id_file)
goto enomem_error;
*errn = 0;
answer_id = explicit_id (errn, &tree->explicit_skips, 0, fq_path, id_file, "a_", (is_dir ? (t_uchar *)"./." : basename), stat_buf, NULL);
if (answer_id || (*errn != ENOENT))
goto return_answer_id;
if ((tree->tag_method == arch_implicit_id_tagging) && !is_dir && !is_symlink)
{
*errn = 0;
answer_id = implicit_id (errn, 0, fq_path, basename, "i_", stat_buf, tree->id_tagging_shortcut);
if (answer_id || *errn)
goto return_answer_id;
}
if ((tree->tag_method == arch_tagline_id_tagging) && !is_dir && !is_symlink)
{
*errn = 0;
answer_id = implicit_id (errn, 0, fq_path, 0, "i_", stat_buf, tree->id_tagging_shortcut);
if (answer_id || *errn)
goto return_answer_id;
}
/* is there an "=default" id?
*/
lim_free (0, id_file);
if (is_dir)
id_file = file_name_in_vicinity (0, fq_path, ".arch-ids/=default");
else
id_file = file_name_in_vicinity (0, dir, ".arch-ids/=default");
if (!id_file)
goto enomem_error;
*errn = 0;
answer_id = explicit_id (errn, &tree->explicit_skips, 0, fq_path, id_file, "w_", (is_dir ? (t_uchar *)"./." : basename), stat_buf, NULL);
if (answer_id || (*errn != ENOENT))
goto return_answer_id;
/* no explicit, =all, implicit, tagline, or =default id.
*/
if (tree->untagged_is_source)
{
int skipped;
struct stat dont_care_stat_buf;
lim_free (0, id_file);
id_file = file_name_in_vicinity (0, dir, ".arch-ids/=dont-care");
if (!id_file)
goto enomem_error;
skipped = (!!assoc_ref (tree->explicit_skips, id_file));
if (!skipped && (0 <= vu_lstat (errn, id_file, &dont_care_stat_buf)))
{
long amt;
answer_id = str_alloc_cat (0, "k_", as_file);
if (!answer_id)
goto enomem_error;
amt = smash_non_graphical (answer_id, str_length (answer_id));
answer_id[amt] = 0;
goto return_answer_id;
}
if (!skipped)
assoc_set (&tree->explicit_skips, id_file, "yes");
if (*errn == ENOENT)
{
long amt;
answer_id = str_alloc_cat (0, "?_", as_file);
if (!answer_id)
goto enomem_error;
amt = smash_non_graphical (answer_id, str_length (answer_id));
answer_id[amt] = 0;
goto return_answer_id;
}
else
goto return_answer_id;
}
else
{
*errn = 0;
goto return_answer_id;
}
}
}
t_uchar *
arch_id_from_explicit_file (int *errn, t_uchar * path)
{
return explicit_id (errn, 0, 0, 0, path, "x_", 0, NULL, NULL);
}
static int
is_at_or_underneath_archdir (char * rel_file)
{
static int compiled = 0;
static regex_t pattern = {0,};
if (!compiled)
{
int re_error;
re_error = regcomp (&pattern, "^(.*/)?(\\{arch\\}(/[a-zA-Z=][^/]*)*|\\{arch\\}/\\.arch-project-tree)$", REG_EXTENDED);
invariant (!re_error);
compiled = 1;
}
return filename_matches (&pattern, rel_file);
}
static int
filename_matches (regex_t * pattern, char * filename)
{
int answer;
answer = regexec (pattern, filename, 0, 0, 0);
if (answer == REG_NOMATCH)
return 0;
if (answer == REG_NOERROR)
return 1;
panic ("unexpected regexec error in arch_inventory_traversal");
return -1;
}
t_uchar *
explicit_id (int * errn,
assoc_table * skips,
struct alloc_limits * limits,
t_uchar const * const arg_file,
t_uchar * id_file,
t_uchar * prefix,
t_uchar * postfix,
struct stat * statb,
assoc_table id_tagging_shortcut)
{
int id_fd;
t_uchar * answer;
char buf[1024];
long amt;
int ign;
if (skips && assoc_ref (*skips, id_file))
{
*errn = ENOENT;
return 0;
}
if (id_tagging_shortcut)
{
struct stat stat_buf;
/* prefix determines action..
* E_ - it's a .arch-ids file we are examinig
* x_ - it's a normal file that /may/ have an id file
* w_ a_ unsupported
*/
/* id_file is always the (prospective) id file */
/* TODO?: Cache the inode sig of the id file */
t_uchar * signature;
t_uchar * cached_id;
if (statb)
stat_buf = *statb;
else if (0 > vu_lstat (errn, (t_uchar *)arg_file, &stat_buf))
return 0;
signature = arch_statb_inode_sig (&stat_buf);
cached_id = assoc_ref (id_tagging_shortcut, signature);
if (!cached_id)
debug (dbg_invent, 8, "no cached id for signature '%s'\n", signature);
lim_free (0, signature);
if (cached_id)
/* cache hit */
{
if (!str_cmp (arg_file, id_file))
/* id of the .id file itself */
{
if ((cached_id[0] == 'E') && (cached_id[1] == '_'))
{
debug (dbg_invent, 8, "Cached signature : '%s' on '%s'\n", cached_id, id_file);
return str_save (0, cached_id);
}
/* unexpected cached result, ignore */
}
else if ((cached_id[0] == 'x') && (cached_id[1] == '_'))
{
/* TODO: we could save 1 stat per .id file if we cached the results of the lookups somewhere -
* .arch-ids is read before the files in the dir.
*/
/* this call here should get a cache-hit on the id shortcut file */
t_uchar *id_tag = explicit_id (errn, skips, limits, id_file, id_file, "E_", 0, 0, id_tagging_shortcut);
if (id_tag && (!str_cmp (id_tag + 1, cached_id + 1)))
{
lim_free (0, id_tag);
debug (dbg_invent, 8, "Cached signature on : '%s'\n", cached_id);
return str_save (0, cached_id);
}
else
lim_free (0, id_tag);
}
debug (dbg_invent, 8, "Cached signature '%s' not usable\n", cached_id);
}
}
debug (dbg_invent, 8, "Reading '%s' for explicit id on '%s'\n", id_file, arg_file);
id_fd = vu_open (errn, id_file, O_RDONLY, 0);
if (id_fd < 0)
{
if (skips)
assoc_set (skips, id_file, "yes");
return 0;
}
answer = str_save (limits, prefix);
if (!answer)
{
enomem_error:
*errn = ENOMEM;
if (answer)
lim_free (limits, answer);
return 0;
}
while (1)
{
t_uchar * eol;
amt = vu_read_retry (errn, id_fd, buf, sizeof (buf));
if (amt < 0)
{
lim_free (limits, answer);
vu_close (&ign, id_fd);
return 0;
}
if (!amt)
break;
eol = str_chr_index_n (buf, amt, '\n');
if (!eol)
{
t_uchar * old_answer;
amt = smash_non_graphical (buf, amt);
old_answer = answer;
answer = str_realloc_cat_n (limits, answer, buf, amt);
if (!answer)
goto enomem_error;
}
else
{
t_uchar * old_answer;
amt = eol - (t_uchar *)buf;
amt = smash_non_graphical (buf, amt);
old_answer = answer;
answer = str_realloc_cat_n (limits, answer, buf, amt);
if (!answer)
goto enomem_error;
break;
}
}
if (postfix)
answer = str_realloc_cat (limits, answer, postfix);
if (!answer)
goto enomem_error;
vu_close (&ign, id_fd);
debug (dbg_invent, 8, "Created signature : '%s' on '%s' (from '%s')\n", answer, arg_file, id_file);
return answer;
}
static t_uchar *
implicit_id (int * errn,
struct alloc_limits * limits,
t_uchar const * const file,
t_uchar * basename,
t_uchar * prefix,
struct stat * statb,
assoc_table id_tagging_shortcut)
{
int file_fd;
struct stat file_stat_buf;
char buf[1026]; /* one byte slack for \0 termination */
int amt;
int line;
int bottom;
if (id_tagging_shortcut)
{
t_uchar * signature = arch_statb_inode_sig (statb);
t_uchar * cached_id = assoc_ref (id_tagging_shortcut, signature);
lim_free (0, signature);
if (cached_id && (cached_id[0] == 'i') && (cached_id[1] == '_'))
return str_save (0, cached_id);
else if (cached_id)
return 0;
}
/* This is a slightly screwy, historic interface.
*
* Passing `base != 0' means the old, larch-style tag syntax.
*
* Passing `base == 0' means tagline syntax.
*/
/* Search the file itself (last, then first 1K) for a line beginning:
*
* tla-style tagline id tagging (basename == 0)
* -----------------------------------------
*
* <punct>arch-tag:<blanks>
*
*
* larch-style implicit id tagging (basename != 0)
* --------------------------------------------
*
* <punct>basename<blanks>-
*
* or
*
* <punct>tag:<blanks>
*
* after the dash, skip any blanks -- the rest is the id.
*/
file_fd = vu_open (errn, (t_uchar *)file, O_RDONLY, 0);
if (file_fd < 0)
return 0;
if (0 > vu_fstat (errn, file_fd, &file_stat_buf))
goto error_return;
for (bottom = 1; bottom >= 0; --bottom)
{
if (!bottom)
{
if (0 > vu_lseek (errn, file_fd, 0, SEEK_SET))
{
int ign;
error_return:
vu_close (&ign, file_fd);
return 0;
}
amt = vu_read_retry (errn, file_fd, buf, sizeof (buf) - 2);
if (amt < 0)
goto error_return;
}
else
{
char * x;
if (file_stat_buf.st_size > sizeof (buf) - 1)
amt = sizeof (buf) - 1;
else
continue;
/* Yes, this is a off by one error. However changing it
* breaks existing file-ids
*/
if (0 > vu_lseek (errn, file_fd, -1026, SEEK_END))
goto error_return;
amt = vu_read_retry (errn, file_fd, buf, sizeof (buf) - 1);
if (amt < 0)
goto error_return;
x = str_chr_index_n (buf, amt, '\n');
if (!x)
continue;
amt = amt - (1 + x - buf);
mem_move (buf, x + 1, amt);
}
buf[amt] = 0;
line = 0;
while (1)
{
int is_inventory_id;
/* skip punctuation and blanks at the start of the line
*/
while ((line < amt) && (char_is_punct (buf[line]) || char_is_blank (buf[line])))
++line;
if (line == amt)
break;
if (buf[line] == '\n')
{
++line;
continue;
}
is_inventory_id = (basename ? !str_cmp_prefix ("tag:", buf + line) : !str_cmp_prefix ("arch-tag:", buf + line));
if ( !is_inventory_id
&& (!basename || str_cmp_prefix (basename, buf + line)))
{
t_uchar * eol;
not_this_line:
eol = str_chr_index_n (buf + line, amt - line, '\n');
if (!eol)
break;
line = eol - (t_uchar *)buf;
}
else
{
t_uchar * eol;
if (is_inventory_id)
line += (basename ? str_length ("tag:") : str_length ("arch-tag:"));
else
line += str_length (basename);
if (!is_inventory_id)
{
while ((line < amt) && char_is_blank (buf[line]))
++line;
if (line == amt)
break;
if (buf[line] != '-')
goto not_this_line;
++line;
}
if (line == amt)
break;
/* This is the tag line.
*/
while ((line < amt) && char_is_blank (buf[line]))
++line;
eol = str_chr_index_n (buf + line, amt - line, '\n');
if (!eol)
/* end of buffer no eol */
{
eol = buf + amt;
if (!bottom)
if (file_stat_buf.st_size > sizeof (buf) - 1)
safe_printfmt (2, "Warning: top-of-file truncated tag in: %s\n", file);
}
if (0 == (eol - (t_uchar *)(buf + line)))
{
/* an empty id
*/
break;
}
{
long size;
t_uchar * answer;
size = smash_non_graphical (buf + line, eol - (t_uchar *)(buf + line));
answer = str_alloc_cat_n (limits, prefix, buf + line, size);
if (0 > vu_close (errn, file_fd))
goto error_return;
if (!answer)
*errn = ENOMEM;
return answer;
}
}
}
}
if (0 > vu_close (errn, file_fd))
goto error_return;
*errn = 0;
return 0;
}
static long
smash_non_graphical (t_uchar * buf, long amt)
{
long x;
while (amt > 0)
{
if (!char_is_graph (buf[amt - 1]))
--amt;
else
break;
}
for (x = 0; x < amt; ++x)
{
if (!char_is_graph (buf[x]))
buf[x] = '_';
}
return amt;
}
/**
* \brief convenience function for arbitrary id retrieval.
*
* please us arch_inventory_id if you have a tree object
* \param some_path the filepath relative or absolute
* \param method set to other than arch_unspecified_id_tagging to override for trees that support overriding.
*/
t_uchar *
arch_id_for_path(t_uchar const * const some_path, enum arch_id_tagging_method method)
{
arch_project_tree_t * tree;
t_uchar * dir;
t_uchar * answer = NULL;
t_uchar * rel_path;
t_uchar * abs_path = arch_abs_path (some_path);
if (safe_file_is_directory (some_path))
dir = str_save (0, some_path);
else
{
dir = file_name_directory_file (0, abs_path);
}
tree = arch_project_tree_new_ext (talloc_context, dir, 1, 0);
/* load the inventory rules */
arch_tree_id_tagging_method (tree, NULL, 0);
/* override them if needed */
if (method != arch_unspecified_id_tagging)
tree->tag_method = method;
rel_path = arch_project_tree_rel_path_from_abs (tree, abs_path);
answer = arch_inventory_id (tree, rel_path, 0);
arch_project_tree_delete (tree);
lim_free (0, dir);
lim_free (0, abs_path);
lim_free (0, rel_path);
return answer;
}
/* tag: Tom Lord Wed May 14 07:20:26 2003 (inv-tags.c)
*/
syntax highlighted by Code2HTML, v. 0.9.1