/* invent.c: project tree inventory library routines
 *
 * vim:smartindent ts=8:sts=2:sta:et:ai:shiftwidth=2
 ****************************************************************
 * Copyright (C) 2002, 2003 Tom Lord
 *
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/bugs/exception.h"
#include "hackerlab/bugs/panic.h"
#include "hackerlab/os/errno.h"
#include "hackerlab/os/errno-to-string.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/mem/talloc.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/arrays/ar.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/safe.h"
#include "libarch/inode-sig.h"
#include "libarch/inv-ids.h"
#include "libarch/invent.h"
#include "libarch/pfs.h"
#include "libarch/debug.h"



struct directory_regexps
{
  regex_t * regexps[sizeof (struct arch_inventory_regexps) / sizeof (regex_t)];
  struct arch_inventory_regexps storage;
#define DIR_REGEXP(dir_re, name) \
  (dir_re)->regexps[offsetof (struct arch_inventory_regexps, name##_pattern) / sizeof (regex_t)]
};

typedef struct 
{
    inv_callback callback;
    void * closure;
    assoc_table * explicit_skips;
} inventory_state_t;



/* __STDC__ prototypes for static functions */
static void copy_options_but_regexps (struct arch_inventory_options * dest,
                                      const struct arch_inventory_options * src);
static int incompatible_options (struct arch_inventory_options const * left,
                               struct arch_inventory_options const * src);
static void cache_inventory_callback (void * closure, invent_callback_data_t const * const data);
static int cached_changeset_destructor (void * data);
static invent_callback_data_t * invent_cb_cache (invent_callback_data_t const * const data);
static void source_inventory_callback (void * closure, invent_callback_data_t const * const data);
static void source_inventory_files_callback (void * closure, invent_callback_data_t const * const data);
static void arch_inventory_traversal_internal (struct arch_inventory_options * options,
                                               t_uchar const * user_dir,
                                               t_uchar const * tree_dir,
                                               inventory_state_t * state,
                                               assoc_table * explicit_skips,
                                               arch_project_tree_t * current_tree);
static int cmp_files (const void * va, const void * vb);
static int right_order_for_recursion (t_uchar * a, t_uchar * b);
static int contains_illegal_character (char * filename);
static int filename_matches (regex_t * pattern, char * filename);
static int is_nested_tree (char * path);
static int is_comment_line (t_uchar * line, long len);
static int sets_re (char * kw, char ** re, t_uchar * line, long len);
static int sets_id_tagging_method (char * kw,
                                   enum arch_id_tagging_method * method_var,
                                   enum arch_inventory_category * untagged_category_var,
                                   enum arch_id_tagging_method method,
                                   enum arch_inventory_category untagged_category,
                                   t_uchar * line, long len);
static int sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var,
                                             t_uchar * saved_line, long saved_len);
static void read_directory_regexps (struct directory_regexps * regexps, char const * dir_name);
static void free_directory_regexps (struct directory_regexps * regexps);



rel_table
arch_source_inventory (arch_project_tree_t * tree, int include_ctl, int include_precious, int include_nested)
{
  int here_fd;
  rel_table answer = 0;
  struct arch_inventory_options options;

  here_fd = safe_open (".", O_RDONLY, 0);
  safe_chdir (tree->root);

  mem_set0 ((t_uchar *)&options, sizeof (options));
  options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
  options.want_ids = 1;
  options.method = arch_names_id_tagging; /* default only */
  options.nested = include_nested;
  options.include_excluded = !!include_ctl;
  arch_get_inventory_naming_conventions (&options, tree);

  arch_inventory_traversal (&options, tree, source_inventory_callback, (void *)&answer);

  arch_free_inventory_naming_conventions (&options);

  safe_fchdir (here_fd);
  safe_close (here_fd);

  return answer;
}

rel_table
arch_source_files_inventory (arch_project_tree_t * tree, int include_ctl, int include_precious)
{
  int here_fd;
  rel_table answer = 0;
  struct arch_inventory_options options;

  here_fd = safe_open (".", O_RDONLY, 0);
  safe_chdir (tree->root);

  mem_set0 ((t_uchar *)&options, sizeof (options));
  options.categories = arch_inventory_source | (include_precious ? arch_inventory_precious : 0);
  options.want_ids = 1;
  options.method = arch_names_id_tagging; /* default only */
  options.nested = 0;
  options.include_excluded = !!include_ctl;
  /* this fills out the real used inventory method. */
  arch_get_inventory_naming_conventions (&options, tree);

  arch_inventory_traversal (&options, tree, source_inventory_files_callback, (void *)&answer);

  arch_free_inventory_naming_conventions (&options);

  safe_fchdir (here_fd);
  safe_close (here_fd);

  return answer;
}

/**
 * \brief are two inventory options compatible in terms of the results ?
 * \return non 0 on incompatible
 */
int
incompatible_options (struct arch_inventory_options const * left,
                      struct arch_inventory_options const * right)
{
  return left->categories != right->categories ||
      left->method != right->method ||
      left->want_ids != right->want_ids ||
      left->treat_unrecognized_source_as_source != right->treat_unrecognized_source_as_source ||
      left->nested != right->nested ||
      left->include_excluded != right->include_excluded ||
      left->override_method != right->override_method;
}

static void
copy_options_but_regexps (struct arch_inventory_options * dest,
                          const struct arch_inventory_options * src)
{
  dest->categories = src->categories;
  dest->method = src->method;
  dest->want_ids = src->want_ids;
  dest->treat_unrecognized_source_as_source = src->treat_unrecognized_source_as_source;
  dest->nested = src->nested;
  dest->include_excluded = src->include_excluded;
  dest->override_method = src->override_method;
}


void
source_inventory_callback (void * closure, invent_callback_data_t const * const data)
{
  rel_table * answer = (rel_table *)closure;

  rel_add_records (answer, rel_make_record (data->path, data->id, 0), 0);
}

void
source_inventory_files_callback (void * closure, invent_callback_data_t const * const data)
{
  rel_table * answer = (rel_table *)closure;

  if (!S_ISDIR (data->stat_buf.st_mode))
    rel_add_records (answer, rel_make_record (data->path, data->id, 0), 0);
}




t_uchar *
arch_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }

    case arch_inventory_source:
      {
        return str_save (0, ".");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS|tags|cscope.*\\.out|\\.svn)$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.sw(o|p)|\\.orig|\\.rej|\\.original|\\.modified|\\.reject|\\.(o|a|so|core|so(\\.[[:digit:]]+)*))$|^core$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^(,.*)$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$");
      }
    }
}


/* DO NOT under ANY circumastances change these. They predate pre-populating 
 * tagging-method and will break old archives if altered.
 */
t_uchar *
arch_ancient_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_ancient_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }

    case arch_inventory_source:
      {
        return str_save (0, "^([_=a-zA-Z0-9].*|\\.arch-ids|\\{arch\\}|\\.arch-project-tree)$");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^(\\+.*|\\.gdbinit|\\.#ckpts-lock|=build\\.*|=install\\.*|CVS|CVS\\.adm|RCS|RCSLOG|SCCS|TAGS)$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^.*(~|\\.~[0-9]+~|\\.bak|\\.orig|\\.rej|\\.original|\\.modified|\\.reject)$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^(,.*)$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^(.*\\.(o|a|so|core)|core)$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\}|\\.arch-inventory)$");
      }
    }
}


t_uchar *
arch_null_default_naming_conventions_regexp (enum arch_inventory_category cat)
{
  switch (cat)
    {
    default:
      {
        panic ("unrecognized inventory category (arch_null_default_naming_conventions_regexp)");
        return 0;                 /* not reached */
      }
    case arch_inventory_source:
      {
        return str_save (0, ".*");
      }

    case arch_inventory_precious:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_backup:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_junk:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_unrecognized:
      {
        return str_save (0, "^$");
      }

    case arch_inventory_excludes:
      {
        return str_save (0, "^(.arch-ids|\\{arch\\})$");
      }
    }
}


void
arch_get_inventory_naming_conventions (struct arch_inventory_options * options,
                                       arch_project_tree_t * tree)
{
  char * excludes = 0;
  char * junk = 0;
  char * backup = 0;
  char * precious = 0;
  char * unrecognized = 0;
  char * source = 0;
  int re_error;

  if (tree && tree->root)
    {
      t_uchar * id_tagging_method_file;

      id_tagging_method_file = arch_tree_id_tagging_method_file (tree);

      if (!safe_access (id_tagging_method_file, F_OK))
        {
          int in_fd;
          t_uchar * line;
          long len;
          enum arch_id_tagging_method tree_method = arch_names_id_tagging;
          enum arch_inventory_category untagged_source_category = arch_inventory_precious;

          in_fd = safe_open (id_tagging_method_file, O_RDONLY, 0);

          while (1)
            {
              safe_next_line (&line, &len, in_fd);
              if (!len)
                break;

              (void)(!is_comment_line (line, len)
                     && !sets_id_tagging_method ("implicit", &tree_method, &untagged_source_category, arch_implicit_id_tagging, arch_inventory_source, line, len)
                     && !sets_id_tagging_method ("tagline", &tree_method, &untagged_source_category, arch_tagline_id_tagging, arch_inventory_source, line, len)
                     && !sets_id_tagging_method ("explicit", &tree_method, &untagged_source_category, arch_explicit_id_tagging, arch_inventory_precious, line, len)
                     && !sets_id_tagging_method ("names", &tree_method, &untagged_source_category, arch_names_id_tagging, arch_inventory_source, line, len)

                     && !sets_untagged_source_disposition (&untagged_source_category, line, len)

                     && !sets_re ("exclude", &excludes, line, len)
                     && !sets_re ("junk", &junk, line, len)
                     && !sets_re ("backup", &backup, line, len)
                     && !sets_re ("precious", &precious, line, len)
                     && !sets_re ("unrecognized", &unrecognized, line, len)
                     && !sets_re ("source", &source, line, len));
            }

          safe_close (in_fd);
          if (!options->override_method)
            {
              options->method = tree_method;
              options->untagged_source_category = untagged_source_category;
            }
        }

      /* default naming conventions.
       */
      if (excludes == NULL)
	excludes = arch_ancient_default_naming_conventions_regexp (arch_inventory_excludes);
      if (junk == NULL)
	junk = arch_ancient_default_naming_conventions_regexp (arch_inventory_junk);
      if (backup == NULL)
	backup = arch_ancient_default_naming_conventions_regexp (arch_inventory_backup);
      if (precious == NULL)
	precious = arch_ancient_default_naming_conventions_regexp (arch_inventory_precious);
      if (unrecognized == NULL)
	unrecognized = arch_ancient_default_naming_conventions_regexp (arch_inventory_unrecognized);
      if (source == NULL)
	source = arch_ancient_default_naming_conventions_regexp (arch_inventory_source);

      lim_free (0, id_tagging_method_file);
    }
  else
    {
      excludes = arch_null_default_naming_conventions_regexp (arch_inventory_excludes);
      junk = arch_null_default_naming_conventions_regexp (arch_inventory_junk);
      backup = arch_null_default_naming_conventions_regexp (arch_inventory_backup);
      precious = arch_null_default_naming_conventions_regexp (arch_inventory_precious);
      unrecognized = arch_null_default_naming_conventions_regexp (arch_inventory_unrecognized);
      source = arch_null_default_naming_conventions_regexp (arch_inventory_source);
    }

  /* compile the conventions.
   */

  re_error = regcomp (&options->regexps.excludes_pattern, excludes, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `excludes'");

  re_error = regcomp (&options->regexps.junk_pattern, junk, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `junk'");

  re_error = regcomp (&options->regexps.backup_pattern, backup, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `backup'");

  re_error = regcomp (&options->regexps.precious_pattern, precious, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `precious'");

  re_error = regcomp (&options->regexps.unrecognized_pattern, unrecognized, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `unrecognized'");

  re_error = regcomp (&options->regexps.source_pattern, source, REG_EXTENDED);
  if (re_error)
    panic ("bogus id-tagging-method regexp for `source'");


  lim_free (0, excludes);
  lim_free (0, junk);
  lim_free (0, backup);
  lim_free (0, precious);
  lim_free (0, unrecognized);
  lim_free (0, source);
}


void
arch_free_inventory_naming_conventions (struct arch_inventory_options * options)
{
  regfree (&options->regexps.excludes_pattern);
  regfree (&options->regexps.junk_pattern);
  regfree (&options->regexps.backup_pattern);
  regfree (&options->regexps.precious_pattern);
  regfree (&options->regexps.unrecognized_pattern);
  regfree (&options->regexps.source_pattern);
}


void
arch_inventory_traversal (struct arch_inventory_options * options,
                          arch_project_tree_t * tree,
                          inv_callback callback,
                          void * closure)
{
  arch_inventory_traversal_path (options, tree, ".", callback, closure);
}

void
arch_inventory_traversal_path (struct arch_inventory_options * options,
                          arch_project_tree_t * tree,
                          t_uchar const * const path,
                          inv_callback callback,
                          void * closure)
{
  assoc_table saved_id_tagging_shortcut = 0;
  assoc_table explicit_skips = 0;
  inventory_state_t state;
  t_uchar * tree_rel_path;

  saved_id_tagging_shortcut = tree->id_tagging_shortcut;
  if ((options->method == arch_implicit_id_tagging) || (options->method == arch_tagline_id_tagging) ||
      (options->method == arch_explicit_id_tagging))
    arch_read_id_shortcut (&tree->id_tagging_shortcut, tree->root);

  state.callback = callback;
  state.closure = closure;
  tree_rel_path = arch_abs_path (path);
  tree_rel_path = str_replace (tree_rel_path, arch_project_tree_rel_path_from_abs (tree, tree_rel_path));
  
  arch_inventory_traversal_internal (options, path, tree_rel_path, &state, &explicit_skips, tree);

  free_assoc_table (tree->id_tagging_shortcut);
  tree->id_tagging_shortcut = saved_id_tagging_shortcut;
  free_assoc_table (explicit_skips);
  lim_free (0, tree_rel_path);
}

AR_TYPEDEF(char *, charstr);
AR_TYPEDEF(int, legint);

static void do_file_or_deferred (int *deferred_recursions_head, int * deferred_recursions_tail, int * x, int const n_files,
                                 ar_charstr files, ar_legint deferred_recursions, 
                                 ar_legint is_deferred_nested,
                                 t_uchar const * const user_dir,
                                 t_uchar const * const tree_dir,
                                 arch_project_tree_t * current_tree,
                                 struct arch_inventory_options * options,
                                 inventory_state_t * state,
                                 struct directory_regexps * dir_regexps,
                                 assoc_table * explicit_skips);

/**
 * \brief handle a single dir
 * \param tree the project tree logic to use for this tree
 */
void
arch_inventory_traversal_internal (struct arch_inventory_options * options,
                                   t_uchar const * user_dir,
                                   t_uchar const * tree_dir,
                                   inventory_state_t * state,
                                   assoc_table * explicit_skips,
                                   arch_project_tree_t * current_tree)
{
  DIR * dir;
  ar_charstr files = 0;
  int n_files;
  int deferred_recursions_head;
  int deferred_recursions_tail;
  ar_legint deferred_recursions = 0;
  ar_legint is_deferred_nested = 0;
  struct directory_regexps * dir_regexps = 0;
  int x;

  safe_opendir (&dir, user_dir);

  files = 0;
  n_files = 0;

  while (1)
    {
      char * file;

      safe_readdir (&file, dir);
      if (!file)
        break;
      ar_push_char_star (&files, file);
      ++n_files;

      /* look for per-directory inventory regexps
       */
      if (dir_regexps == 0 && (file[0] == '.') && !str_cmp(".arch-inventory", file))
        {
          dir_regexps = lim_malloc (0, sizeof *dir_regexps);
          read_directory_regexps (dir_regexps, user_dir);
        }
    }

  safe_closedir (dir);

  qsort ((void *)files, n_files, sizeof (char *), cmp_files);

  /* We want to invoke `callback' on a lexically sorted list of paths.
   * Suppose that "foo" is a directory, but "foo-bar" also exists.
   * That means we have to invoke callbacks in the order:
   *
   *                foo
   *                foo-bar
   *                foo/xyzzy
   *
   * When we detect that "foo" is a directory, we can't
   * necessarilly recurse immediately. Instead, we keep a queue
   * of deferred directories, recursing on them at the right time.
   */

  deferred_recursions_head = 0;
  deferred_recursions_tail = 0;
  deferred_recursions = 0;
  is_deferred_nested = 0;

  ar_setsize_legint (&deferred_recursions, n_files);
  ar_setsize_legint (&is_deferred_nested, n_files);

  /* FIXME: parameterisation should occur before we get in here 
   * RBC 20050321
   */
  if ((options->untagged_source_category == arch_inventory_source && !current_tree->untagged_is_source) ||
      (options->untagged_source_category != arch_inventory_source && current_tree->untagged_is_source))
    {
      current_tree->untagged_is_source = (options->untagged_source_category == arch_inventory_source);
   // debug (2, 6, "Overriding current_tree->untagged_is_source to be %s\n", current_tree->untagged_is_source ? "true" : "false");
    }
  if (options->method != current_tree->tag_method)
    {
      current_tree->tag_method = options->method;
      // debug (2, 6, "Overriding tagging method to be %d\n", options->method);
    }

  x = 0;
  while ((x < n_files) || (deferred_recursions_head != deferred_recursions_tail))
    {
      do_file_or_deferred(&deferred_recursions_head, &deferred_recursions_tail, &x, n_files, files, deferred_recursions,
                          is_deferred_nested, user_dir, tree_dir, current_tree, options, state, dir_regexps, explicit_skips);
    }

  ar_for_each (files, x)
    lim_free (0, files[x]);

  ar_free_charstr (&files);
  ar_free_legint (&deferred_recursions);
  ar_free_legint (&is_deferred_nested);
  free_directory_regexps (dir_regexps);
}

static void 
set_file (invent_callback_data_t *self, t_uchar const * const current_dir, char * file)
{
    self->path = file_name_in_vicinity (0, current_dir, file);
}

void
do_file_or_deferred (int * deferred_recursions_head, int * deferred_recursions_tail, int *x, int const n_files, ar_charstr files,
                     ar_legint deferred_recursions, ar_legint is_deferred_nested, t_uchar const * const user_dir,
                     t_uchar const * const tree_dir,
                     arch_project_tree_t * current_tree,
                     struct arch_inventory_options * options,
                     inventory_state_t * state,
                     struct directory_regexps * const dir_regexps,
                     assoc_table * explicit_skips)
{
    char * file;
    struct stat *stat_buf;
    int is_control = 0;
    t_uchar * tree_rel_path = NULL;
    invent_callback_data_t cbparam = {0,};

    if ((*deferred_recursions_head != *deferred_recursions_tail)
        && ((*x >= n_files)
            || right_order_for_recursion (files[deferred_recursions[*deferred_recursions_head]], files[*x])))
      {
        int deferred_nested = is_deferred_nested[*deferred_recursions_head];
        set_file (&cbparam, user_dir, files[deferred_recursions[*deferred_recursions_head]]);
        tree_rel_path = file_name_in_vicinity (0, tree_dir, files[deferred_recursions[*deferred_recursions_head]]);
        ++*deferred_recursions_head;
        if (deferred_nested)
            goto handle_deferred_nested;
        else
            goto handle_deferred;
      }

    file = files[*x];
    ++*x;
    set_file (&cbparam, user_dir, file);
    stat_buf = &cbparam.stat_buf;
    cbparam.id = 0;
    cbparam.has_source_name = 0;
    
    debug (dbg_invent, 8, "do_file_or_deferred: file %s\n", file);

    /* . and .. are mandatory exclude files
    */
    if (!str_cmp (".", file) || !str_cmp ("..", file))
      {
next_file:
        lim_free (0, cbparam.path);
        lim_free (0, tree_rel_path);
        return;
      }

    tree_rel_path = file_name_in_vicinity (0, tree_dir, file);
      {
        t_uchar * abs_path = file_name_in_vicinity (0, current_tree->root, tree_rel_path);
        safe_lstat (abs_path, &cbparam.stat_buf);
        lim_free (0, abs_path);
      }

    /* only symlinks, directories and plain files are
     * considerably source, all others need to be precious
     */
    if (   !S_ISREG(stat_buf->st_mode)
           && !S_ISLNK(stat_buf->st_mode)
           && !S_ISDIR(stat_buf->st_mode))
      {
        goto precious_file;
      }

    /* non-printing characters, spaces, and glob characters are
     * mandatory unrecognized files
     */
    if (contains_illegal_character (file))
      {
unrecognized_file:
        if (options->categories & arch_inventory_unrecognized)
          {
            cbparam.category = arch_inventory_unrecognized;
            state->callback (state->closure, &cbparam);
          }
        goto next_file;
      }

    /* callers can specify a pattern for additional files to
     * exclude from consideration.
     */
    if (!options->include_excluded
        && ((dir_regexps && DIR_REGEXP(dir_regexps, excludes) && filename_matches (DIR_REGEXP(dir_regexps, excludes), file))
            || filename_matches (&options->regexps.excludes_pattern, file)))
        goto next_file;

    /* arch control files that get past the exclude pattern are
     * always source.
     */
    is_control = arch_is_control_file (cbparam.path, file);
    if (is_control)
      {
        goto handle_source_file;
      }

    /* file names beginning with "++" are always precious.
    */
    if ((file[0] == '+') && (file[1] == '+'))
      {
        goto precious_file;
      }

    /* file names beginning with ",," are always considered junk files.
    */
    if (file[0] == ',' && file[1] == ',')
        goto junk_file;

    /* test against optional per-directory regexps first
    */
    if (dir_regexps)
      {
        regex_t * re;

        /* junk */
        re = DIR_REGEXP(dir_regexps, junk);
        if (re && filename_matches (re, file))
            goto junk_file;

        /* backup */
        re = DIR_REGEXP(dir_regexps, backup);
        if (re && filename_matches (re, file))
            goto backup_file;

        /* precious */
        re = DIR_REGEXP(dir_regexps, precious);
        if (re && filename_matches (re, file))
            goto precious_file;

        /* unrecognized */
        re = DIR_REGEXP(dir_regexps, unrecognized);
        if (re && filename_matches (re, file))
            goto unrecognized_file;

        /* source */
        re = DIR_REGEXP(dir_regexps, source);
        if (re && filename_matches (re, file))
            goto handle_source_file;
      }

    /* callers can specify a pattern for "junk" files -- files
     * presumed safe-to-be-removed by automatic tools, barring
     * concurrent tools.
     */
    if (filename_matches (&options->regexps.junk_pattern, file))
      {
junk_file:
        if (options->categories & arch_inventory_junk)
          {
            cbparam.category = arch_inventory_junk;
            state->callback (state->closure, &cbparam);
          }
        goto next_file;
      }

    /* callers can specify a pattern for "backup" files -- files
     * that are created by editors and similar programs to save old
     * versions
     */
    if (filename_matches (&options->regexps.backup_pattern, file))
      {
backup_file:
        if (options->categories & arch_inventory_backup)
          {
            cbparam.category = arch_inventory_backup;
            state->callback (state->closure, &cbparam);
          }
        goto next_file;
      }

    /* callers can specify a pattern for "precious" files -- files
     * that are not part of the source, but which should never be
     * automatically removed.
     */
    if (filename_matches (&options->regexps.precious_pattern, file))
      {
precious_file:
        if (options->categories & arch_inventory_precious)
          {
            cbparam.category = arch_inventory_precious;
            state->callback (state->closure, &cbparam);
          }
        goto next_file;
      }

    /* callers can specify a pattern for explicitly "unrecognized" files --
     * files that should be flagged as errors in tree-lint reports.
     */
    if (filename_matches (&options->regexps.unrecognized_pattern, file))
      {
        goto unrecognized_file;
      }

    /* finally, a pattern for "source" files -- files which are expected
     * to be source files.  Note that the option untagged_source_category
     * determines the final disposition of files which match the source
     * pattern, but have no evident id.
     *
     * If a directory appears to be a source directory, but contains a rules
     * directory of its own, then it is in fact the root of a nested tree -- not
     * a regular source file.
     */
    if (filename_matches (&options->regexps.source_pattern, file))
      {
handle_source_file:
        if (S_ISDIR (stat_buf->st_mode) && is_nested_tree (cbparam.path))
          {
            if (options->categories & arch_inventory_tree)
              {
                cbparam.category = arch_inventory_tree;
                cbparam.has_source_name = 1;
                state->callback (state->closure, &cbparam);
              }

            if (options->nested)
              {
                if ((*x < n_files) && !right_order_for_recursion (file, files[*x]))
                  {
                    deferred_recursions[*deferred_recursions_tail] = *x - 1;
                    is_deferred_nested[*deferred_recursions_tail] = 1;
                    ++*deferred_recursions_tail;
                    goto next_file;
                  }

handle_deferred_nested:
                  {
                    struct arch_inventory_options nest_opts;
                    arch_project_tree_t * nested_tree;
                    nested_tree = arch_project_tree_new_ext (talloc_context, cbparam.path, 1, 1);

                    mem_set0 ((t_uchar *)&nest_opts, sizeof nest_opts);
                    copy_options_but_regexps (&nest_opts, options);
                    arch_get_inventory_naming_conventions (&nest_opts, nested_tree);
                    arch_inventory_traversal_internal (&nest_opts, cbparam.path, ".", state, explicit_skips, nested_tree);
                    arch_free_inventory_naming_conventions (&nest_opts);
                    if (nested_tree != current_tree)
                        arch_project_tree_delete (nested_tree);
                  }
              }
            goto next_file;
          }
        else
          {
            t_uchar * id;
            enum arch_inventory_category this_files_category;

            /* Not a nested tree.   Matches the source pattern.
            */

            id = 0;


            /* Do we need to compute the inventory id of this file which has a source name?
             *
             * Certainly so if the caller wants ids.
             * 
             * Otherwise we need the id only if we need it to verify that this
             * is, indeed, source:
             * 
             * If untagged-source is source, then we don't need the id.
             * If untagged-source is something else, then we need to see if it has an id.
             */
            if (options->want_ids || !current_tree->untagged_is_source)
              {
                /* if the caller wants tags, or if we can only be certain that this is
                 * source by seeing if it has an id, get the id.
                 */
                assoc_table saved_explicit_skips;
                /* FIXME - callers should be setting this in the tree in the first place */
                saved_explicit_skips = current_tree->explicit_skips;
                current_tree->explicit_skips = *explicit_skips;
                id = arch_inventory_id (current_tree, tree_rel_path, stat_buf);
                /* restore */
                *explicit_skips = current_tree->explicit_skips;
                current_tree->explicit_skips = saved_explicit_skips;
              }


            /* What is the category of the file?
             * 
             * If we've confirmed that it has an id, then it's certainly source.
             * Also if untagged-source is automatically source.
             * 
             * If untagged-source is not source there are two cases:
             * 
             * If it's a control file, then it is unrecognized, unconditionally.
             * 
             * Otherwise, the untagged-source directive tells us what it is.
             */
            if (id || (options->untagged_source_category == arch_inventory_source))
                this_files_category = arch_inventory_source;
            else if (is_control)
                this_files_category = arch_inventory_unrecognized;
            else
                this_files_category = options->untagged_source_category;


            /* Some callbacks want to see "matches source name but unrecognized 
             * for want of tag" as a source file.   They distinguish this from
             * "matches unrecognized name" or "doesn't match any pattern".
             * RBC 20050409 this is used by changeset-inventory in trees
             * where untagged-source = unrecognized, where explicit-default
             * tags are disabled, so that explicit-default tagged files
             * are returned to the changeset inventory.
             * so what happens is that in untagged-source unrecognized trees.
             * If the file is a 'dont-care' dir, then changeset-creation ignores
             * it silently, otherwise barfs.
             * to trigger this you need:
             * * ids requested
             * * file is source
             * * file has no id
             * * untagged-source unrecognized
             * * explicit-defailt --dont-care
             * which means that id will always be 0. 
             * to eliminate this option, we could just not callback in this circumstance.
             */
            if (options->treat_unrecognized_source_as_source && (this_files_category == arch_inventory_unrecognized))
              {
                this_files_category = arch_inventory_source;
              }

            if (this_files_category & options->categories)
              {
                cbparam.category = this_files_category;
                cbparam.id = id;
                cbparam.has_source_name = 1;
                state->callback (state->closure, &cbparam);
              }

            if (this_files_category != arch_inventory_source)
                goto next_file;

            lim_free (0, id);
            id = 0;

            /* recurse into directories, or plan to later.
            */
            if (S_ISDIR (stat_buf->st_mode))
              {
                if ((*x < n_files) && !right_order_for_recursion (file, files[*x]))
                  {
                    deferred_recursions[*deferred_recursions_tail] = *x - 1;
                    is_deferred_nested[*deferred_recursions_tail] = 0;
                    ++*deferred_recursions_tail;
                    goto next_file;
                  }

handle_deferred:
                if (! options->no_recursion)
                  arch_inventory_traversal_internal (options, cbparam.path, tree_rel_path, state, explicit_skips, current_tree);
              }
            goto next_file;
          }
      }
    else
        goto unrecognized_file;
}



static int
cmp_files (const void * va, const void * vb)
{
  char * a;
  char * b;

  a = *(char **)va;
  b = *(char **)vb;

  return str_cmp (a, b);
}

static int
right_order_for_recursion (t_uchar * a, t_uchar * b)
{
  /* a and b are already in lexical order (a < b)
   */
  while ((*a == *b) && *a && *b)
    {
      ++a;
      ++b;
    }

  if (!*a)
    {
      /* Does "A/" come before "B" in an alphabetical listing?
       */
      return (*b > '/');
    }
  else if (!*b)
    {
      /* Does "B/" come after "A" in an aphabetical listing?
       */
      return (*a < '/');
    }
  else
    {
      invariant (*a < *b);
      return 1;
    }
}


static int
contains_illegal_character (char * filename)
{
  int x;

  for (x = 0; filename[x]; ++x)
    {
      invariant (filename[x] != '/');

      if (char_is_non_ascii (((t_uchar *)filename)[x]))
        return 1;
    }

  return 0;
}

static int
filename_matches (regex_t * pattern, char * filename)
{
  int answer;

  answer = regexec (pattern, filename, 0, 0, 0);

  if (answer == REG_NOMATCH)
    return 0;

  if (answer == REG_NOERROR)
    return 1;

  panic ("unexpected regexec error in arch_inventory_traversal");
  return -1;
}

int
arch_is_control_file (char * rel_file, char * filename)
{
  static int compiled = 0;
  static regex_t control_pattern = {0,};

  if (!compiled)
    {
      int re_error;

      re_error = regcomp (&control_pattern,
                          "^((.*/)?(\\.arch-ids(/(=id|[^/]*\\.id))?|\\{arch\\}((/[a-zA-Z=][^/~]*)(/[0-9a-zA-Z=][^/~]*)*)?|\\{arch\\}/\\.arch-project-tree|\\.arch-inventory))$",
                          REG_EXTENDED);
      invariant (!re_error);
      compiled = 1;
    }

  return filename_matches (&control_pattern, rel_file);
}


static int
is_nested_tree (char * path)
{
  return arch_project_tree_dir_is_root (NULL, path, NULL);
}

static int
is_comment_line (t_uchar * line, long len)
{
  return !len || char_is_space (line[0]) || (line[0] == '#');
}

static int
sets_re (char * kw, char ** re, t_uchar * line, long len)
{
  int l;
  t_uchar * tem;
  t_uchar * tem2;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  line += l;
  len -= l;
  while (len && char_is_space (line[0]))
    {
      ++line;
      --len;
    }
  while (len && char_is_space (line [len - 1]))
    --len;

  if (!*re)
    {
      tem2 = str_save_n (0, line, len); 
      tem = str_alloc_cat (0, "(", tem2);
      tem = str_realloc_cat (0, tem, ")");
      lim_free (0, tem2);
      *re = tem;
    }
  else
    {
      tem2 = str_save_n (0, line, len);
      tem = str_alloc_cat (0, "|(", tem2);
      tem = str_realloc_cat (0, tem, ")");
      *re = str_realloc_cat (0, *re, tem);
      lim_free (0, tem);
      lim_free (0, tem2);
    }

  return 1;
}

static int
sets_id_tagging_method (char * kw,
                     enum arch_id_tagging_method * method_var,
                     enum arch_inventory_category * untagged_category_var,
                     enum arch_id_tagging_method method,
                     enum arch_inventory_category untagged_category,
                     t_uchar * line, long len)
{
  int l;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  *method_var = method;
  *untagged_category_var = untagged_category;
  return 1;
}

static int
sets_untagged_source_disposition (enum arch_inventory_category * untagged_category_var,
                                  t_uchar * saved_line, long saved_len)
{
  t_uchar * line = saved_line;
  long len = saved_len;
  t_uchar * kw = "untagged-source";
  int l;
  t_uchar * spec = 0;
  enum arch_inventory_category cat;

  l = str_length (kw);

  if (len < (l + 1))
    return 0;

  if (str_cmp_prefix (kw, line) || !char_is_space (line[l]))
    return 0;

  len -= l;
  line += l;

  while (len && char_is_blank (line[0]))
    {
      ++line;
      --len;
    }

  spec = line;

  while (len && char_is_alpha (line[0]))
    {
      ++line;
      --len;
    }

  spec = str_save_n (0, spec, line - spec);

  while (len && char_is_blank (line[0]))
    {
      ++line;
      --len;
    }

  if (len && (line[0] != '\n'))
    {
    syntax_error:
      safe_printfmt (2, "arch: syntax error in =tagging-method:\n  %.*s\n", (int)saved_len, saved_line);
      exit (2);
    }
  else
    {
      if (!str_cmp (spec, "source"))
        {
          cat = arch_inventory_source;
        }
      else if (!str_cmp (spec, "precious"))
        {
          cat = arch_inventory_precious;
        }
      else if (!str_cmp (spec, "backup"))
        {
          cat = arch_inventory_backup;
        }
      else if (!str_cmp (spec, "junk"))
        {
          cat = arch_inventory_junk;
        }
      else if (!str_cmp (spec, "unrecognized"))
        {
          cat = arch_inventory_unrecognized;
        }
      else
        goto syntax_error;

      *untagged_category_var = cat;
    }

  lim_free (0, spec);
  return 1;
}

static void
read_directory_regexps (struct directory_regexps * regexps, char const * dir_name)
{
  char * excludes = 0;
  char * junk = 0;
  char * backup = 0;
  char * precious = 0;
  char * unrecognized = 0;
  char * source = 0;
  t_uchar * file_name = 0;
  int fd;

  file_name = file_name_in_vicinity (0, dir_name, ".arch-inventory");
  fd = safe_open (file_name, O_RDONLY, 0);
  while (1)
    {
      t_uchar * line;
      long len;

      safe_next_line (&line, &len, fd);
      if (!len)
        break;

      (void)(!is_comment_line (line, len)
             && !sets_re ("exclude", &excludes, line, len)
             && !sets_re ("junk", &junk, line, len)
             && !sets_re ("backup", &backup, line, len)
             && !sets_re ("precious", &precious, line, len)
             && !sets_re ("unrecognized", &unrecognized, line, len)
             && !sets_re ("source", &source, line, len));
    }
  safe_close (fd);

  mem_set0((t_uchar*)regexps->regexps, sizeof regexps->regexps);
  if (excludes)
    {
      if (regcomp (&regexps->storage.excludes_pattern, excludes, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `excludes' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, excludes) = &regexps->storage.excludes_pattern;
    }

  if (junk)
    {
      if (regcomp (&regexps->storage.junk_pattern, junk, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `junk' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, junk) = &regexps->storage.junk_pattern;
    }

  if (backup)
    {
      if (regcomp (&regexps->storage.backup_pattern, backup, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `backup' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, backup) = &regexps->storage.backup_pattern;
    }

  if (precious)
    {
      if (regcomp (&regexps->storage.precious_pattern, precious, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `precious' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, precious) = &regexps->storage.precious_pattern;
    }

  if (unrecognized)
    {
      if (regcomp (&regexps->storage.unrecognized_pattern, unrecognized, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `unrecognized' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, unrecognized) = &regexps->storage.unrecognized_pattern;
    }

  if (source)
    {
      if (regcomp (&regexps->storage.source_pattern, source, REG_EXTENDED))
        {
          panic_msg ("bogus tagging-method regexp for `source' in ");
          panic (dir_name);
        }
      DIR_REGEXP(regexps, source) = &regexps->storage.source_pattern;
    }

  lim_free (0, file_name);
  lim_free (0, excludes);
  lim_free (0, junk);
  lim_free (0, backup);
  lim_free (0, precious);
  lim_free (0, unrecognized);
  lim_free (0, source);
}

static void
free_directory_regexps (struct directory_regexps * regexps)
{
  int i;

  if (!regexps)
      return;

  for (i = 0; i < sizeof regexps->regexps / sizeof *regexps->regexps; ++i)
    if (regexps->regexps[i])
      regfree (regexps->regexps[i]);
  lim_free (0, regexps);
}

rel_table
pick_non_control (rel_table table)
{
  int x;
  rel_table answer = 0;

  for (x = 0; x < rel_n_records (table); ++x)
    {
      t_uchar * id;

      id = table[x][1];

      if (str_cmp_prefix ("A_", id))
        rel_add_records (&answer, rel_copy_record (table[x]), 0);
    }

  return answer;
}

/**
 * \brief normalise a provided path on string alone.
 * 
 * \return a normalised path.
 */
static t_uchar *
normal_from_path (t_uchar const *path)
{
    int from=0;
    int length = str_length (path);
    t_uchar *answer;
    if (!length)
        return NULL;

    /* strip back to the first path element */
    if (path[0] == '/')
      {
        from = 1;
        length -= 1;
      }
    else if (!str_cmp_prefix ("./", path))
      {
        from = 2;
        length -= 2;
      }
    
    if (!length)
        return NULL;

    /* now knock of any trailing / */
    if (path[from + length - 1] == '/')
        length -= 1;

    if (!length)
        return NULL;
  
    answer = str_save_n (0, &path [from], length);
    return str_replace (answer, str_alloc_cat (0, "./", answer));
}

/**
 * \brief normalise a provided path as a prefix for filtering against
 * \return a normalised path.
 */
static t_uchar *
prefix_from_path (t_uchar const *path)
{
    t_uchar *answer;
    answer = normal_from_path (path);
    if (!answer)
        return NULL;
    return str_replace (answer, str_alloc_cat (0, answer, "/"));
}

/**
 * \brief filter an_inventory so that only paths specified in filter_paths are presnet
 *
 * \param an_inventory a rel_table with path, id pairs.
 * \param filter_paths, a rel_table, with paths. paths should start with ./ 
 */
rel_table
arch_inventory_included (rel_table an_inventory, rel_table filter_paths)
{
    rel_table answer = NULL;
    int scan_position;
    if (!rel_n_records (filter_paths))
        return rel_copy_table (an_inventory);
    rel_for_each (an_inventory, scan_position)
      {
        int filter_position;
        int keep=0;
        rel_for_each (filter_paths, filter_position)
          {
            t_uchar *prefix_filter=prefix_from_path(filter_paths[filter_position][0]);
            t_uchar *exact_filter =normal_from_path(filter_paths[filter_position][0]);
            if (!prefix_filter)
                continue;
            if (!str_cmp_prefix (prefix_filter, an_inventory[scan_position][0]))
              {
                keep = 1;
                continue;
              }
            if (!str_cmp (exact_filter, an_inventory[scan_position][0]))
              {
                keep = 1;
                continue;
              }
          }
        if (keep)
            rel_add_records (&answer, rel_copy_record (an_inventory[scan_position]), 0);
      }
    return answer;
}

struct caching_inventory_state
{
    cached_changeset_inventory_t * cache;
    inv_callback callback;
    void * closure;
};

int
cached_changeset_destructor (void *data)
{
  cached_changeset_inventory_t * cache = talloc_get_type (data, cached_changeset_inventory_t);
  int index;
  ar_for_each (cache->inventory, index)
    talloc_free (cache->inventory[index]);
  ar_free_invent_cb (&cache->inventory);
  return 0;
}

/** 
 * \brief perform a changeset inventory of tree, caching the results in cache, or using cached results, if the options are compatible
 */
void
cached_changeset_inventory_traveral (struct arch_inventory_options * options, arch_project_tree_t * tree, inv_callback callback, void * closure, cached_changeset_inventory_t ** cache)
{
  struct caching_inventory_state state;
  if (!*cache || incompatible_options (options, &(*cache)->options))
    {
      talloc_free (*cache);
      *cache = talloc (NULL, cached_changeset_inventory_t);
      (*cache)->inventory = NULL;
      talloc_set_destructor (*cache, cached_changeset_destructor);
      copy_options_but_regexps (&(*cache)->options, options);

      state.cache = *cache;
      state.callback = callback;
      state.closure = closure;
      debug (dbg_invent, 8, "cached_changeset_inventory_traveral: inventorying %s\n", tree->root);
      arch_inventory_traversal (options, tree, cache_inventory_callback, &state);
    }
  else
    {
      int index;
      debug (dbg_invent, 8, "cached_changeset_inventory_traveral: cached inventory %s\n", tree->root);
      ar_for_each ((*cache)->inventory, index)
        callback (closure, (*cache)->inventory[index]);
    }
}

/**
 * \brief cache inventory results, simultanesouly calling the ultimate callee
 */
void
cache_inventory_callback (void * closure, invent_callback_data_t const * const data)
{
  struct caching_inventory_state * state = (struct caching_inventory_state *)closure;
  /* callback the client */
  state->callback (state->closure, data);
  /* append the result to the cache */
  ar_push_invent_cb (&state->cache->inventory, invent_cb_cache (data));
}

/**
 * \brief create a cached invent_cb entry - talloc managed
 */
invent_callback_data_t *
invent_cb_cache (invent_callback_data_t const * const data)
{
  invent_callback_data_t *result = talloc (NULL, invent_callback_data_t);
  *result = *data;
  result->path = talloc_strdup (result, data->path);
  if (result->id)
      result->id = talloc_strdup (result, data->id);
  return result;
}



/* tag: Tom Lord Wed May 14 09:47:16 2003 (invent.c)
 */


syntax highlighted by Code2HTML, v. 0.9.1