# Part of the A-A-P recipe executive: Store signatures

# Copyright (C) 2002-2003 Stichting NLnet Labs
# Permission to copy and use this file is specified in the file COPYING.
# If this file is missing you can find it here: http://www.a-a-p.org/COPYING

#
# This module handles remembering signatures of targets and sources.
#

import os
import os.path
import string
import md5
import time

from Util import *
from Message import *
from Filetype import ft_detect
import Global

# Both "signatures" dictionaries are indexed by the name of the target Node
# (file or directory).
# For non-virtual nodes the absulute name is used.
# Each entry is a dictionary indexed by the source-name@check-name and has a
# string value.
# The "buildcheck" entry is used for the build commands.
# The "signfile" entry is used to remember the sign file that stores the
# signatures for this target.
# "old_signatures" is for the signatures when we started.
# "upd_signatures" is for the signatures of items for which the build commands
# were successfully executed and are to be stored for the next time.
# Example:
# {"/aa/bb/file.o" : {  "signfile" : "/aa/bb/AAPDIR/sign",
#                       "/aa/bb/file.c@md5" : "13a445e5",
#                       "buildcheck" : "-O2"},
#  "/aa/bb/bar.o"  : {  "signfile" : "/aa/bb/mysign",
#                       "/aa/bb/bar-debug.c@time" : "143234",
#                       "aa/bb/bar.h@time" : "423421"}}
old_signatures = {}
upd_signatures = {}

# "new_signatures" caches the signatures we computed this invocation.  It is a
# dictionary of dictionaries:
#   new_signatures["/path/file"]["md5"] = md5hex("/path/file")
# The key for the toplevel dictionary is the Node name.
# The key for the second level is the check name.  The target name isn't used
# here.
new_signatures = {}

# "chd_signatures" remembers which files were marked as changed with --changed
# or ":changed".
chd_signatures = {}

# Key used for the timestamp on the signature entry.  Used to find the last
# updated entry for published files.
timekey = "lastupdate"

# Name for the sign file relative to the directory of the target or the recipe.
sign_normal_fname = in_aap_dir("sign")
sign_normal_fname_len = len(sign_normal_fname)

# Remember which sign files have been read.
# Also when the file couldn't actually be read, so that we remember to write
# this file when signs have been updated.
# An entry exists when the file has been read.  It's value is non-zero when the
# file should be written back.
sign_files = {}

def get_sign_file(recdict, target, update):
    """Get the sign file that is used for "target" if it wasn't done already.
       When "update" is non-zero, mark the file needs writing."""
    fname = fname_fold(target.get_sign_fname())
    if not sign_files.has_key(fname):
        sign_files[fname] = update
        sign_read(recdict, fname)
    elif update:
        sign_files[fname] = 1


def sign_file_dir(fname):
    """Return the directory to which files in sign file "fname" are relative
       to.  Use uniform format (forward slashes)."""
    # When using "AAPDIR/sign" remove two parts, otherwise only remove the file
    # name itself.
    if (len(fname) >= sign_normal_fname_len
            and fname_fold(fname[-sign_normal_fname_len:])
                                             == fname_fold(sign_normal_fname)):
        fname = os.path.dirname(fname)
    return fname_fold(os.path.dirname(fname))


# In the sign files, file names are stored with a leading "-" for a virtual
# node and "=" for a file name.  Expand to an absolute name for non-virtual
# nodes.
def sign_expand_name(recdict, dir, name):
    """Expand "name", which is used in a sign file in directory "dir" or
    "dir/AAPDIR"."""
    n = name[1:]
    if name[0] == '-' or os.path.isabs(n):
        return n

    # Make a full path by joining the dir and the file name.
    n_len = len(n)
    if n_len <= 3 or n[:3] != "../":
        return fname_fold(os.path.join(dir, n))

    # Remove "../" items.  Don't use os.path.normpath(), it's a bit slow (it
    # does more than removing ".." items).
    di = len(dir)
    ni = 3
    while 1:
        di = string.rfind(dir, "/", 0, di)
        if di < 0:
            # "cannot happen": sign file corrupted?  Return the name with the
            # ".." (equivalent to sign not found).
            msg_error(recdict, _('In sign file: Too many ".." in "%s" for directory "%s"') % (name, dir))
            return fname_fold(os.path.join(dir, n))
        if ni + 3 >= n_len or n[ni:ni+3] != "../":
            break
        ni = ni + 3

    return dir[:di + 1] + n[ni:]

def sign_reduce_name(dir, name):
    """Reduce "name" to what is used in a sign file."""
    if os.path.isabs(name):
        return '=' + fname_fold(shorten_name(name, dir))
    return '-' + fname_fold(name)


#
# A sign file stores the signatures for items (sources and targets) with the
# values they when they were computed in the past.
# The format of each line is:
#       =foo.o<ESC>=foo.c@md5_c=012346<ESC>...<ESC>\n
# "md5_c" can be "md5", "time", etc.  Note that it's not always equal to
# the "check" attribute, both "time" and "older" use "time" here.

def sign_read(recdict, fname):
    """Read the signature file "fname" into our dictionary of signatures."""
    basedir = sign_file_dir(fname)
    try:
        f = open(fname, "rb")
        for line in f.readlines():
            e = string.find(line, "\033")
            if e > 0:   # Only use lines with an ESC
                name = sign_expand_name(recdict, basedir, line[:e])
                old_signatures[name] = {"signfile" : fname_fold(fname)}
                while 1:
                    s = e + 1
                    e = string.find(line, "\033", s)
                    if e < 1:
                        break
                    i = string.rfind(line, "=", s, e)
                    if i < 1:
                        break
                    old_signatures[name][sign_expand_name(recdict,
                                           basedir, line[s:i])] = line[i + 1:e]
        f.close()
    except StandardError, e:
        # TODO: handle errors?  It's not an error if the file does not exist.
        msg_note(recdict, (_('Cannot read sign file "%s": ')
                                               % shorten_name(fname)) + str(e))


def sign_write_all(recdict):
    """Write all updated signature files from our dictionary of signatures."""

    # This assumes we are the only one updating this signature file, thus there
    # is no locking.  It wouldn't make sense sharing with others, since
    # building would fail as well.
    for fname in sign_files.keys():
        if sign_files[fname]:
            # This sign file needs to be written.
            sign_write(recdict, fname)

def sign_write(recdict, fname):
    """Write one updated signature file."""
    sign_dir = os.path.dirname(fname)
    if not os.path.exists(sign_dir):
        try:
            os.makedirs(sign_dir)
        except StandardError, e:
            msg_warning(recdict,
                        (_('Cannot create directory for signature file "%s": ')
                                                             % fname) + str(e))
    try:
        f = open(fname, "wb")
    except StandardError, e:
        msg_warning(recdict,
                          (_('Cannot open signature file for writing: "%s": ')
                              % fname) + str(e))
        return

    def write_sign_line(f, basedir, s, old, new):
        """Write a line to sign file "f" in directory "basedir" for item "s",
        with checks from "old", using checks from "new" if they are present."""
        f.write(sign_reduce_name(basedir, s) + "\033")

        # Go over all old checks, write all of them, using the new value
        # if it is available.
        for c in old.keys():
            if c != "signfile":
                if new and new.has_key(c):
                    val = new[c]
                else:
                    val = old[c]
                f.write("%s=%s\033" % (sign_reduce_name(basedir, c), val))

        # Go over all new checks, write the ones for which there is no old
        # value.
        if new:
            for c in new.keys():
                if c != "signfile" and not old.has_key(c):
                    f.write("%s=%s\033" % (sign_reduce_name(basedir, c),
                                                                       new[c]))

        f.write("\n")

    basedir = sign_file_dir(fname)
    try:
        # Go over all old signatures, write all of them, using checks from
        # upd_signatures when they are present.
        # When the item is in upd_signatures, use the directory specified
        # there, otherwise use the directory of old_signatures.
        for s in old_signatures.keys():
            if upd_signatures.has_key(s):
                if upd_signatures[s]["signfile"] != fname:
                    continue
                new = upd_signatures[s]
            else:
                if old_signatures[s]["signfile"] != fname:
                    continue
                new = None
            write_sign_line(f, basedir, s, old_signatures[s], new)


        # Go over all updated signatures, write only the ones for which there
        # is no old signature.
        for s in upd_signatures.keys():
            if (not old_signatures.has_key(s)
                                   and upd_signatures[s]["signfile"] == fname):
                write_sign_line(f, basedir, s, upd_signatures[s], None)

        f.close()
    except StandardError, e:
        msg_warning(recdict, (_('Write error for signature file "%s": '),
                                                               fname) + str(e))

def hexdigest(m):
    """Turn an md5 object into a string of hex characters."""
    # NOTE:  This routine is a method in the Python 2.0 interface
    # of the native md5 module, not in Python 1.5.
    h = string.hexdigits
    r = ''
    for c in m.digest():
        i = ord(c)
        r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
    return r


def check_md5(recdict, fname, msg = 1):
    if not os.path.isfile(fname):
        # A non-existing file isn't that bad, could be a virtual target that
        # wasn't marked as being virtual.
        if msg:
            msg_note(recdict,
                  _('Cannot compute md5 checksum for "%s": it does not exist')
                  % fname)
        return "unknown"

    try:
        f = open(fname, "rb")
        m = md5.new()
        while 1:
            # Read big blocks at a time for speed, but don't read the whole
            # file at once to reduce memory usage.
            data = f.read(32768)
            if not data:
                break
            m.update(data)
        f.close()
        res = hexdigest(m)
    except StandardError, e:
        if msg:
            msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
                                                             % fname) + str(e))
        res = "unknown"
    return res


def check_c_md5(recdict, fname):
    """Compute an md5 signature after filtering out irrelevant items for C
       code (white space and comments)."""
    try:
        f = open(fname)
    except StandardError, e:
        # Can't open a URL here.
        msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
                                                             % fname) + str(e))
        return "unknown"

    m = md5.new()

    inquote = 0
    incomment = 0
    while 1:
        # Read one line at a time.
        try:
            data = f.readline()
        except StandardError, e:
            # Can't read the file.
            msg_warning(recdict, (_('Cannot read "%s": ') % fname) + str(e))
            return "unknown"

        if not data:
            break

        # Filter out irrelevant changes:
        # - Collapse sequences of white space into one space.
        # - Remove comments.
        # TODO: double-byte characters may have a backslash or double quote
        # as their second byte, how to know this?
        data_len = len(data) - 1
        s = 0
        skipwhite = 1
        i = 0
        while i < data_len:
            if inquote:
                # Only need to search for the endquote.
                while i < data_len:
                    c = data[i]
                    i = i + 1
                    if c == '"':
                        inquote = 0
                        break
                    elif c == '\\':
                        i = i + 1
                continue

            if incomment:
                # Only need to search for the comment end "*/".
                while i < data_len:
                    if data[i] == '*' and data[i + 1] == '/':
                        incomment = 0
                        i = i + 2
                        s = i
                        skipwhite = 1
                        break
                    i = i + 1
                continue

            c = data[i]
            if c == ' ' or c == '\t':
                # White space after non-white: dump text.
                if not skipwhite:
                    m.update(data[s:i] + ' ')

                # Skip white space
                while 1:
                    i = i + 1
                    if i == data_len:
                        break
                    c = data[i]
                    if c != ' ' and c != '\t':
                        break
                s = i
                skipwhite = 0
                if i == data_len:
                    break

            if c == '/' and (data[i + 1] == '/' or data[i + 1] == '*'):
                # Start of // or /* comment.
                if i > s:
                    m.update(data[s:i] + ' ')
                i = i + 1
                if data[i] == '/':
                    s = data_len
                    break
                incomment = 1
            else:
                skipwhite = 0
                if c == "'":
                    # skip '"' or '\'', not the start of a sting
                    if data[i + 1] == '\\':
                        i = i + 1
                    i = i + 2
                elif c == '"':
                    inquote = 1
            i = i + 1

        if not (incomment or skipwhite) and s < data_len:
            m.update(data[s:data_len] + ' ')

    try:
        f.close()
    except:
        # Error while closing a read file???
        pass

    return hexdigest(m)


def buildcheckstr2sign(str):
    """Compute a signature from a string for the buildcheck."""
    return hexdigest(md5.new(str))


def _sign_lookup(signatures, name, key):
    """
    Get the "key" signature for item "name" from dictionary "signatures".
    "name" must have gone through fname_fold().
    """
    if not signatures.has_key(name):
        return ''
    s = signatures[name]
    if not s.has_key(key):
        return ''
    return s[key]


def sign_clear(name):
    """
    Clear the new signatures of an item.
    Store an item to note that it was cleared (see below).
    Used when it has been build.
    """
    new_signatures[name] = {}
    new_signatures[name]["cleared"] = 1


def get_new_sign(recdict, name, check, force = 0):
    """Get the current "check" signature for the item "name".
       "name" is the absolute name for non-virtual nodes.
       This doesn't depend on the target.  "name" can be a URL.
       When "force" is non-zero also use a cleared signature (for --touch).
       Returns a string (also for timestamps)."""
    # When not executing build commands and a target has been pretended to be
    # build, its signature is cleared.  Don't recompute it then, the file will
    # not be different but we do want a different signature.
    name = fname_fold(name)
    if (not force
            and skip_commands()
            and new_signatures.has_key(name)
            and new_signatures[name].has_key("cleared")):
        return "cleared"

    key = check
    res = _sign_lookup(new_signatures, fname_fold(name), key)
    if not res:
        # Compute the signature now
        if check == "time":
            from Remote import url_time
            res = str(url_time(recdict, name))
        elif check == "md5":
            res = check_md5(recdict, name)
        elif check == "c_md5":
            res = check_c_md5(recdict, name)
        # TODO: other checks, defined with actions
        else:
            res = "unknown"

        # Store the new signature to avoid recomputing it many times.
        if not new_signatures.has_key(name):
            new_signatures[name] = {}
        new_signatures[name][key] = res

    return res

def sign_clear_target(recdict, target):
    """Called to clear old signatures after successfully executing build rules
       for "target".  sign_updated() should be called next for each source."""
    get_sign_file(recdict, target, 1)
    target_name = fname_fold(target.get_name())
    if old_signatures.has_key(target_name):
        del old_signatures[target_name]
    if upd_signatures.has_key(target_name):
        del upd_signatures[target_name]


def sign_clear_file(fname, recursive):
    """Called to clear signatures for a file "fname".
       Used for ":changed" and "--changed=FILE"."""
    chd_signatures[full_fname(fname)] = recursive
    if upd_signatures.has_key(fname):
        del upd_signatures[fname]


def sign_clear_all():
    """Clear all computed signatures.  Used when starting to execute a toplevel
       recipe."""
    global old_signatures, upd_signatures, new_signatures, chd_signatures
    global sign_files
    old_signatures = {}
    chd_signatures = {}
    upd_signatures = {}
    new_signatures = {}
    sign_files = {}


def _sign_upd_sign(recdict, target, key, value):
    """Update signature for node "target" with "key" to "value"."""
    get_sign_file(recdict, target, 1)
    target_name = fname_fold(target.get_name())
    if not upd_signatures.has_key(target_name):
        upd_signatures[target_name] = {"signfile":
                                           fname_fold(target.get_sign_fname())}
    upd_signatures[target_name][key] = value
    # Update the timestamp on the target.
    upd_signatures[target_name][timekey] = str(time.time())


def sign_updated(recdict, source, dict, target):
    """Called after successfully executing build rules for node "target" from
    node "source", using check based on dictionary "dict"."""
    name = source.get_name()
    check = check_name(recdict, name, dict, source.attributes)
    res = get_new_sign(recdict, name, check, force = 1)
    _sign_upd_sign(recdict, target, name + '@' + check, res)

    # if the source file was considered changed and recursive attribute used,
    # the target should be as well.
    if chd_signatures.get(fname_fold(name)):
        chd_signatures[fname_fold(target.get_name())] = 1


def buildcheck_updated(recdict, target, value):
    """Called after successfully executing build rules for node "target" with
       the new buildcheck signature "value"."""
    _sign_upd_sign(recdict, target, '@buildcheck', value)


def get_old_sign(recdict, name, check, target, rootname = None):
    """Get the old "check" signature for item "name" and target node "target".
       "name" must be an absolute and normalized path.
       "rootname" is used for publishing and the "--contents" option.
       If it doesn't exist an empty string is returned."""
    # Check if this file was marked as changed.
    name = fname_fold(name)
    if chd_signatures.has_key(name):
        return "changed"

    # May need to read the sign file for this target.
    get_sign_file(recdict, target, 0)

    key = name + '@' + check
    if not rootname:
        # Use the updated signature if it exists, otherwise use the old one.
        name = fname_fold(target.get_name())
        ret = _sign_lookup(upd_signatures, name, key)
        if ret:
            return ret
        return _sign_lookup(old_signatures, name, key)

    # Go through all updated and old signatures to check if "rootname" matches.
    # Find the entry that was updated most recently.
    rootname = fname_fold(rootname)
    rootname_len = len(rootname)
    ret = ''
    newtime = 0
    for sigdict in [upd_signatures, old_signatures]:
        for name in sigdict.keys():
            if (len(name) > rootname_len
                    and name[:rootname_len] == rootname
                    and sigdict[name].has_key(key)
                    and sigdict[name].has_key(timekey)
                    and float(sigdict[name][timekey]) > newtime):
                ret = sigdict[name][key]
                newtime = float(sigdict[name][timekey])

    return ret


def check_name(recdict, name, itemdict, altdict = None):
    """Return the check name to be used for item "name" with dictlist
       "itemdict".  Also use "altdict" if given (attributes of the node)."""
    if itemdict.has_key("check"):
        check = itemdict["check"]
    elif altdict and altdict.has_key("check"):
        check = altdict["check"]
    else:
        # TODO: make mapping from name or filetype to check configurable
        #if itemdict.has_key("filetype"):
        #    type = itemdict["filetype"]
        #else:
        #    type = ft_detect(itemdict["name"])
        if ((itemdict.get("directory")
                    or (altdict and altdict.get("directory")))
                or os.path.isdir(name)):
            check = "none"      # default check for directories: none
        else:
            # default check is given with $DEFAULTCHECK
            check = get_var_val_int(recdict, "DEFAULTCHECK")
    return check


# vim: set sw=4 et sts=4 tw=79 fo+=l:


syntax highlighted by Code2HTML, v. 0.9.1