# Part of the A-A-P recipe executive: Store signatures # Copyright (C) 2002-2003 Stichting NLnet Labs # Permission to copy and use this file is specified in the file COPYING. # If this file is missing you can find it here: http://www.a-a-p.org/COPYING # # This module handles remembering signatures of targets and sources. # import os import os.path import string import md5 import time from Util import * from Message import * from Filetype import ft_detect import Global # Both "signatures" dictionaries are indexed by the name of the target Node # (file or directory). # For non-virtual nodes the absulute name is used. # Each entry is a dictionary indexed by the source-name@check-name and has a # string value. # The "buildcheck" entry is used for the build commands. # The "signfile" entry is used to remember the sign file that stores the # signatures for this target. # "old_signatures" is for the signatures when we started. # "upd_signatures" is for the signatures of items for which the build commands # were successfully executed and are to be stored for the next time. # Example: # {"/aa/bb/file.o" : { "signfile" : "/aa/bb/AAPDIR/sign", # "/aa/bb/file.c@md5" : "13a445e5", # "buildcheck" : "-O2"}, # "/aa/bb/bar.o" : { "signfile" : "/aa/bb/mysign", # "/aa/bb/bar-debug.c@time" : "143234", # "aa/bb/bar.h@time" : "423421"}} old_signatures = {} upd_signatures = {} # "new_signatures" caches the signatures we computed this invocation. It is a # dictionary of dictionaries: # new_signatures["/path/file"]["md5"] = md5hex("/path/file") # The key for the toplevel dictionary is the Node name. # The key for the second level is the check name. The target name isn't used # here. new_signatures = {} # "chd_signatures" remembers which files were marked as changed with --changed # or ":changed". chd_signatures = {} # Key used for the timestamp on the signature entry. Used to find the last # updated entry for published files. timekey = "lastupdate" # Name for the sign file relative to the directory of the target or the recipe. sign_normal_fname = in_aap_dir("sign") sign_normal_fname_len = len(sign_normal_fname) # Remember which sign files have been read. # Also when the file couldn't actually be read, so that we remember to write # this file when signs have been updated. # An entry exists when the file has been read. It's value is non-zero when the # file should be written back. sign_files = {} def get_sign_file(recdict, target, update): """Get the sign file that is used for "target" if it wasn't done already. When "update" is non-zero, mark the file needs writing.""" fname = fname_fold(target.get_sign_fname()) if not sign_files.has_key(fname): sign_files[fname] = update sign_read(recdict, fname) elif update: sign_files[fname] = 1 def sign_file_dir(fname): """Return the directory to which files in sign file "fname" are relative to. Use uniform format (forward slashes).""" # When using "AAPDIR/sign" remove two parts, otherwise only remove the file # name itself. if (len(fname) >= sign_normal_fname_len and fname_fold(fname[-sign_normal_fname_len:]) == fname_fold(sign_normal_fname)): fname = os.path.dirname(fname) return fname_fold(os.path.dirname(fname)) # In the sign files, file names are stored with a leading "-" for a virtual # node and "=" for a file name. Expand to an absolute name for non-virtual # nodes. def sign_expand_name(recdict, dir, name): """Expand "name", which is used in a sign file in directory "dir" or "dir/AAPDIR".""" n = name[1:] if name[0] == '-' or os.path.isabs(n): return n # Make a full path by joining the dir and the file name. n_len = len(n) if n_len <= 3 or n[:3] != "../": return fname_fold(os.path.join(dir, n)) # Remove "../" items. Don't use os.path.normpath(), it's a bit slow (it # does more than removing ".." items). di = len(dir) ni = 3 while 1: di = string.rfind(dir, "/", 0, di) if di < 0: # "cannot happen": sign file corrupted? Return the name with the # ".." (equivalent to sign not found). msg_error(recdict, _('In sign file: Too many ".." in "%s" for directory "%s"') % (name, dir)) return fname_fold(os.path.join(dir, n)) if ni + 3 >= n_len or n[ni:ni+3] != "../": break ni = ni + 3 return dir[:di + 1] + n[ni:] def sign_reduce_name(dir, name): """Reduce "name" to what is used in a sign file.""" if os.path.isabs(name): return '=' + fname_fold(shorten_name(name, dir)) return '-' + fname_fold(name) # # A sign file stores the signatures for items (sources and targets) with the # values they when they were computed in the past. # The format of each line is: # =foo.o=foo.c@md5_c=012346...\n # "md5_c" can be "md5", "time", etc. Note that it's not always equal to # the "check" attribute, both "time" and "older" use "time" here. def sign_read(recdict, fname): """Read the signature file "fname" into our dictionary of signatures.""" basedir = sign_file_dir(fname) try: f = open(fname, "rb") for line in f.readlines(): e = string.find(line, "\033") if e > 0: # Only use lines with an ESC name = sign_expand_name(recdict, basedir, line[:e]) old_signatures[name] = {"signfile" : fname_fold(fname)} while 1: s = e + 1 e = string.find(line, "\033", s) if e < 1: break i = string.rfind(line, "=", s, e) if i < 1: break old_signatures[name][sign_expand_name(recdict, basedir, line[s:i])] = line[i + 1:e] f.close() except StandardError, e: # TODO: handle errors? It's not an error if the file does not exist. msg_note(recdict, (_('Cannot read sign file "%s": ') % shorten_name(fname)) + str(e)) def sign_write_all(recdict): """Write all updated signature files from our dictionary of signatures.""" # This assumes we are the only one updating this signature file, thus there # is no locking. It wouldn't make sense sharing with others, since # building would fail as well. for fname in sign_files.keys(): if sign_files[fname]: # This sign file needs to be written. sign_write(recdict, fname) def sign_write(recdict, fname): """Write one updated signature file.""" sign_dir = os.path.dirname(fname) if not os.path.exists(sign_dir): try: os.makedirs(sign_dir) except StandardError, e: msg_warning(recdict, (_('Cannot create directory for signature file "%s": ') % fname) + str(e)) try: f = open(fname, "wb") except StandardError, e: msg_warning(recdict, (_('Cannot open signature file for writing: "%s": ') % fname) + str(e)) return def write_sign_line(f, basedir, s, old, new): """Write a line to sign file "f" in directory "basedir" for item "s", with checks from "old", using checks from "new" if they are present.""" f.write(sign_reduce_name(basedir, s) + "\033") # Go over all old checks, write all of them, using the new value # if it is available. for c in old.keys(): if c != "signfile": if new and new.has_key(c): val = new[c] else: val = old[c] f.write("%s=%s\033" % (sign_reduce_name(basedir, c), val)) # Go over all new checks, write the ones for which there is no old # value. if new: for c in new.keys(): if c != "signfile" and not old.has_key(c): f.write("%s=%s\033" % (sign_reduce_name(basedir, c), new[c])) f.write("\n") basedir = sign_file_dir(fname) try: # Go over all old signatures, write all of them, using checks from # upd_signatures when they are present. # When the item is in upd_signatures, use the directory specified # there, otherwise use the directory of old_signatures. for s in old_signatures.keys(): if upd_signatures.has_key(s): if upd_signatures[s]["signfile"] != fname: continue new = upd_signatures[s] else: if old_signatures[s]["signfile"] != fname: continue new = None write_sign_line(f, basedir, s, old_signatures[s], new) # Go over all updated signatures, write only the ones for which there # is no old signature. for s in upd_signatures.keys(): if (not old_signatures.has_key(s) and upd_signatures[s]["signfile"] == fname): write_sign_line(f, basedir, s, upd_signatures[s], None) f.close() except StandardError, e: msg_warning(recdict, (_('Write error for signature file "%s": '), fname) + str(e)) def hexdigest(m): """Turn an md5 object into a string of hex characters.""" # NOTE: This routine is a method in the Python 2.0 interface # of the native md5 module, not in Python 1.5. h = string.hexdigits r = '' for c in m.digest(): i = ord(c) r = r + h[(i >> 4) & 0xF] + h[i & 0xF] return r def check_md5(recdict, fname, msg = 1): if not os.path.isfile(fname): # A non-existing file isn't that bad, could be a virtual target that # wasn't marked as being virtual. if msg: msg_note(recdict, _('Cannot compute md5 checksum for "%s": it does not exist') % fname) return "unknown" try: f = open(fname, "rb") m = md5.new() while 1: # Read big blocks at a time for speed, but don't read the whole # file at once to reduce memory usage. data = f.read(32768) if not data: break m.update(data) f.close() res = hexdigest(m) except StandardError, e: if msg: msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ') % fname) + str(e)) res = "unknown" return res def check_c_md5(recdict, fname): """Compute an md5 signature after filtering out irrelevant items for C code (white space and comments).""" try: f = open(fname) except StandardError, e: # Can't open a URL here. msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ') % fname) + str(e)) return "unknown" m = md5.new() inquote = 0 incomment = 0 while 1: # Read one line at a time. try: data = f.readline() except StandardError, e: # Can't read the file. msg_warning(recdict, (_('Cannot read "%s": ') % fname) + str(e)) return "unknown" if not data: break # Filter out irrelevant changes: # - Collapse sequences of white space into one space. # - Remove comments. # TODO: double-byte characters may have a backslash or double quote # as their second byte, how to know this? data_len = len(data) - 1 s = 0 skipwhite = 1 i = 0 while i < data_len: if inquote: # Only need to search for the endquote. while i < data_len: c = data[i] i = i + 1 if c == '"': inquote = 0 break elif c == '\\': i = i + 1 continue if incomment: # Only need to search for the comment end "*/". while i < data_len: if data[i] == '*' and data[i + 1] == '/': incomment = 0 i = i + 2 s = i skipwhite = 1 break i = i + 1 continue c = data[i] if c == ' ' or c == '\t': # White space after non-white: dump text. if not skipwhite: m.update(data[s:i] + ' ') # Skip white space while 1: i = i + 1 if i == data_len: break c = data[i] if c != ' ' and c != '\t': break s = i skipwhite = 0 if i == data_len: break if c == '/' and (data[i + 1] == '/' or data[i + 1] == '*'): # Start of // or /* comment. if i > s: m.update(data[s:i] + ' ') i = i + 1 if data[i] == '/': s = data_len break incomment = 1 else: skipwhite = 0 if c == "'": # skip '"' or '\'', not the start of a sting if data[i + 1] == '\\': i = i + 1 i = i + 2 elif c == '"': inquote = 1 i = i + 1 if not (incomment or skipwhite) and s < data_len: m.update(data[s:data_len] + ' ') try: f.close() except: # Error while closing a read file??? pass return hexdigest(m) def buildcheckstr2sign(str): """Compute a signature from a string for the buildcheck.""" return hexdigest(md5.new(str)) def _sign_lookup(signatures, name, key): """ Get the "key" signature for item "name" from dictionary "signatures". "name" must have gone through fname_fold(). """ if not signatures.has_key(name): return '' s = signatures[name] if not s.has_key(key): return '' return s[key] def sign_clear(name): """ Clear the new signatures of an item. Store an item to note that it was cleared (see below). Used when it has been build. """ new_signatures[name] = {} new_signatures[name]["cleared"] = 1 def get_new_sign(recdict, name, check, force = 0): """Get the current "check" signature for the item "name". "name" is the absolute name for non-virtual nodes. This doesn't depend on the target. "name" can be a URL. When "force" is non-zero also use a cleared signature (for --touch). Returns a string (also for timestamps).""" # When not executing build commands and a target has been pretended to be # build, its signature is cleared. Don't recompute it then, the file will # not be different but we do want a different signature. name = fname_fold(name) if (not force and skip_commands() and new_signatures.has_key(name) and new_signatures[name].has_key("cleared")): return "cleared" key = check res = _sign_lookup(new_signatures, fname_fold(name), key) if not res: # Compute the signature now if check == "time": from Remote import url_time res = str(url_time(recdict, name)) elif check == "md5": res = check_md5(recdict, name) elif check == "c_md5": res = check_c_md5(recdict, name) # TODO: other checks, defined with actions else: res = "unknown" # Store the new signature to avoid recomputing it many times. if not new_signatures.has_key(name): new_signatures[name] = {} new_signatures[name][key] = res return res def sign_clear_target(recdict, target): """Called to clear old signatures after successfully executing build rules for "target". sign_updated() should be called next for each source.""" get_sign_file(recdict, target, 1) target_name = fname_fold(target.get_name()) if old_signatures.has_key(target_name): del old_signatures[target_name] if upd_signatures.has_key(target_name): del upd_signatures[target_name] def sign_clear_file(fname, recursive): """Called to clear signatures for a file "fname". Used for ":changed" and "--changed=FILE".""" chd_signatures[full_fname(fname)] = recursive if upd_signatures.has_key(fname): del upd_signatures[fname] def sign_clear_all(): """Clear all computed signatures. Used when starting to execute a toplevel recipe.""" global old_signatures, upd_signatures, new_signatures, chd_signatures global sign_files old_signatures = {} chd_signatures = {} upd_signatures = {} new_signatures = {} sign_files = {} def _sign_upd_sign(recdict, target, key, value): """Update signature for node "target" with "key" to "value".""" get_sign_file(recdict, target, 1) target_name = fname_fold(target.get_name()) if not upd_signatures.has_key(target_name): upd_signatures[target_name] = {"signfile": fname_fold(target.get_sign_fname())} upd_signatures[target_name][key] = value # Update the timestamp on the target. upd_signatures[target_name][timekey] = str(time.time()) def sign_updated(recdict, source, dict, target): """Called after successfully executing build rules for node "target" from node "source", using check based on dictionary "dict".""" name = source.get_name() check = check_name(recdict, name, dict, source.attributes) res = get_new_sign(recdict, name, check, force = 1) _sign_upd_sign(recdict, target, name + '@' + check, res) # if the source file was considered changed and recursive attribute used, # the target should be as well. if chd_signatures.get(fname_fold(name)): chd_signatures[fname_fold(target.get_name())] = 1 def buildcheck_updated(recdict, target, value): """Called after successfully executing build rules for node "target" with the new buildcheck signature "value".""" _sign_upd_sign(recdict, target, '@buildcheck', value) def get_old_sign(recdict, name, check, target, rootname = None): """Get the old "check" signature for item "name" and target node "target". "name" must be an absolute and normalized path. "rootname" is used for publishing and the "--contents" option. If it doesn't exist an empty string is returned.""" # Check if this file was marked as changed. name = fname_fold(name) if chd_signatures.has_key(name): return "changed" # May need to read the sign file for this target. get_sign_file(recdict, target, 0) key = name + '@' + check if not rootname: # Use the updated signature if it exists, otherwise use the old one. name = fname_fold(target.get_name()) ret = _sign_lookup(upd_signatures, name, key) if ret: return ret return _sign_lookup(old_signatures, name, key) # Go through all updated and old signatures to check if "rootname" matches. # Find the entry that was updated most recently. rootname = fname_fold(rootname) rootname_len = len(rootname) ret = '' newtime = 0 for sigdict in [upd_signatures, old_signatures]: for name in sigdict.keys(): if (len(name) > rootname_len and name[:rootname_len] == rootname and sigdict[name].has_key(key) and sigdict[name].has_key(timekey) and float(sigdict[name][timekey]) > newtime): ret = sigdict[name][key] newtime = float(sigdict[name][timekey]) return ret def check_name(recdict, name, itemdict, altdict = None): """Return the check name to be used for item "name" with dictlist "itemdict". Also use "altdict" if given (attributes of the node).""" if itemdict.has_key("check"): check = itemdict["check"] elif altdict and altdict.has_key("check"): check = altdict["check"] else: # TODO: make mapping from name or filetype to check configurable #if itemdict.has_key("filetype"): # type = itemdict["filetype"] #else: # type = ft_detect(itemdict["name"]) if ((itemdict.get("directory") or (altdict and altdict.get("directory"))) or os.path.isdir(name)): check = "none" # default check for directories: none else: # default check is given with $DEFAULTCHECK check = get_var_val_int(recdict, "DEFAULTCHECK") return check # vim: set sw=4 et sts=4 tw=79 fo+=l: