Make the fix script update revision details.
[amitay/build-farm.git] / buildfarm / data.py
index b6cdc59a40e622100f6a7968d2d381f285a39e8c..90345f87fb8e781b99f74c1fcaac1d756cc14d12 100644 (file)
 #   along with this program; if not, write to the Free Software
 #   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 
-
-import ConfigParser
+from cStringIO import StringIO
+import collections
+import hashlib
 import os
 import re
 import time
-import util
 
 
-class BuildStatus(object):
+class BuildSummary(object):
 
-    def __init__(self, stages, other_failures):
-        self.stages = stages
-        self.other_failures = other_failures
+    def __init__(self, host, tree, compiler, revision, status):
+        self.host = host
+        self.tree = tree
+        self.compiler = compiler
+        self.revision = revision
+        self.status = status
 
-    def __str__(self):
-        return repr((self.stages, self.other_failures))
 
+BuildStageResult = collections.namedtuple("BuildStageResult", "name result")
 
-def check_dir_exists(kind, path):
-    if not os.path.isdir(path):
-        raise Exception("%s directory %s does not exist" % (kind, path))
 
+class MissingRevisionInfo(Exception):
+    """Revision info could not be found in the build log."""
 
-def build_status_from_logs(log, err):
-    """get status of build"""
-    m = re.search("TEST STATUS:(\s*\d+)", log)
-    if m:
-        tstatus = int(m.group(1).strip())
-    else:
-        m = re.search("ACTION (PASSED|FAILED): test", log)
-        if m:
-            test_failures = len(re.findall("testsuite-(failure|error): ", log))
-            test_successes = len(re.findall("testsuite-success: ", log))
-            if test_successes > 0:
-                tstatus = test_failures
-            else:
-                tstatus = 255
-            if m.group(1) == "FAILED" and tstatus == 0:
-                tstatus = -1
+    def __init__(self, build=None):
+        self.build = build
+
+
+class LogFileMissing(Exception):
+    """Log file missing."""
+
+
+class BuildStatus(object):
+
+    def __init__(self, stages=None, other_failures=None):
+        if stages is not None:
+            self.stages = [BuildStageResult(n, r) for (n, r) in stages]
         else:
-            tstatus = None
+            self.stages = []
+        if other_failures is not None:
+            self.other_failures = other_failures
+        else:
+            self.other_failures = set()
 
-    m = re.search("INSTALL STATUS:(\s*\d+)", log)
-    if m:
-        istatus = int(m.group(1).strip())
-    else:
-        istatus = None
+    @property
+    def failed(self):
+        if self.other_failures:
+            return True
+        return not all([x.result == 0 for x in self.stages])
 
-    m = re.search("BUILD STATUS:(\s*\d+)", log)
-    if m:
-        bstatus = int(m.group(1).strip())
-    else:
-        bstatus = None
+    def __serialize__(self):
+        return repr(self)
 
-    m = re.search("CONFIGURE STATUS:(\s*\d+)", log)
-    if m:
-        cstatus = int(m.group(1).strip())
-    else:
-        cstatus = None
+    @classmethod
+    def __deserialize__(cls, text):
+        return eval(text)
 
-    other_failures = set()
-    m = re.search("(PANIC|INTERNAL ERROR):.*", log)
-    if m:
-        other_failures.add("panic")
+    def __str__(self):
+        if self.other_failures:
+            return ",".join(self.other_failures)
+        return "/".join([str(x.result) for x in self.stages])
 
-    if "No space left on device" in err or "No space left on device" in log:
-        other_failures.add("disk full")
+    def broken_host(self):
+        if "disk full" in self.other_failures:
+            return True
+        return False
 
-    if "maximum runtime exceeded" in log:
-        other_failures.add("timeout")
+    def regressed_since(self, older):
+        """Check if this build has regressed since another build."""
+        if "disk full" in self.other_failures:
+            return False
+        if "timeout" in self.other_failures and "timeout" in older.other_failures:
+            # When the timeout happens exactly can differ slightly, so it's okay
+            # if the numbers are a bit different..
+            return False
+        if "panic" in self.other_failures and not "panic" in older.other_failures:
+            return True
+        if len(self.stages) < len(older.stages):
+            # Less stages completed
+            return True
+        for ((old_name, old_result), (new_name, new_result)) in zip(
+            older.stages, self.stages):
+            assert old_name == new_name
+            if new_result > old_result:
+                return True
+        return False
 
-    m = re.search("CC_CHECKER STATUS:(\s*\d+)", log)
-    if m:
-        sstatus = int(m.group(1).strip())
-    else:
-        sstatus = None
+    def __cmp__(self, other):
+        other_extra = other.other_failures - self.other_failures
+        self_extra = self.other_failures - other.other_failures
+        # Give more importance to other failures
+        if other_extra:
+            return 1
+        if self_extra:
+            return -1
+
+        la = len(self.stages)
+        lb = len(other.stages)
+        if la > lb:
+            return 1
+        elif lb > la:
+            return -1
+        else:
+            return cmp(other.stages, self.stages)
 
-    return BuildStatus((cstatus, bstatus, istatus, tstatus, sstatus), other_failures)
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__, self.stages, self.other_failures)
 
 
-def lcov_extract_percentage(text):
-    m = re.search('\<td class="headerItem".*?\>Code\&nbsp\;covered\:\<\/td\>.*?\n.*?\<td class="headerValue".*?\>([0-9.]+) \%', text)
-    if m:
-        return m.group(1)
-    else:
-        return None
+def check_dir_exists(kind, path):
+    if not os.path.isdir(path):
+        raise Exception("%s directory %s does not exist" % (kind, path))
+
+
+def build_status_from_logs(log, err):
+    """get status of build"""
+    # FIXME: Perhaps also extract revision here?
+
+    test_failures = 0
+    test_successes = 0
+    test_seen = 0
+    ret = BuildStatus()
+
+    stages = []
+    re_status = re.compile("^([A-Z_]+) STATUS:(\s*\d+)$")
+    re_action = re.compile("^ACTION (PASSED|FAILED):\s+test$")
+
+    for l in log:
+        if l.startswith("No space left on device"):
+            ret.other_failures.add("disk full")
+            continue
+        if "maximum runtime exceeded" in l: # Ugh.
+            ret.other_failures.add("timeout")
+            continue
+        if l.startswith("PANIC:") or l.startswith("INTERNAL ERROR:"):
+            ret.other_failures.add("panic")
+            continue
+        if l.startswith("testsuite-failure: ") or l.startswith("testsuite-error: "):
+            test_failures += 1
+            continue
+        if l.startswith("testsuite-success: "):
+            test_successes += 1
+            continue
+        m = re_status.match(l)
+        if m:
+            stages.append(BuildStageResult(m.group(1), int(m.group(2).strip())))
+            if m.group(1) == "TEST":
+                test_seen = 1
+            continue
+        m = re_action.match(l)
+        if m and not test_seen:
+            if m.group(1) == "PASSED":
+                stages.append(BuildStageResult("TEST", 0))
+            else:
+                stages.append(BuildStageResult("TEST", 1))
+            continue
+
+    # Scan err file for specific errors
+    for l in err:
+        if "No space left on device" in l:
+            ret.other_failures.add("disk full")
+
+    def map_stage(sr):
+        if sr.name != "TEST":
+            return sr
+        # TEST is special
+        if test_successes + test_failures == 0:
+            # No granular test output
+            return BuildStageResult("TEST", sr.result)
+        if sr.result == 1 and test_failures == 0:
+            ret.other_failures.add("inconsistent test result")
+            return BuildStageResult("TEST", -1)
+        return BuildStageResult("TEST", test_failures)
+
+    ret.stages = map(map_stage, stages)
+    return ret
+
+
+def revision_from_log(log):
+    revid = None
+    timestamp = None
+    for l in log:
+        if l.startswith("BUILD COMMIT REVISION: "):
+            revid = l.split(":", 1)[1].strip()
+        elif l.startswith("BUILD COMMIT TIME"):
+            timestamp = l.split(":", 1)[1].strip()
+    if revid is None:
+        raise MissingRevisionInfo()
+    return (revid, timestamp)
 
 
 class NoSuchBuildError(Exception):
@@ -119,320 +222,228 @@ class NoSuchBuildError(Exception):
         self.rev = rev
 
 
-class Tree(object):
-    """A tree to build."""
-
-    def __init__(self, name, scm, repo, branch, subdir="", srcdir=""):
-        self.name = name
-        self.repo = repo
-        self.scm = scm
-        self.branch = branch
-        self.subdir = subdir
-        self.srcdir = srcdir
-        self.scm = scm
-
-    def __repr__(self):
-        return "<%s %r>" % (self.__class__.__name__, self.name)
-
-
 class Build(object):
     """A single build of a tree on a particular host using a particular compiler.
     """
 
-    def __init__(self, store, tree, host, compiler, rev=None):
-        self._store = store
+    def __init__(self, basename, tree, host, compiler, rev=None):
+        self.basename = basename
         self.tree = tree
         self.host = host
         self.compiler = compiler
-        self.rev = rev
+        self.revision = rev
+
+    def __cmp__(self, other):
+        return cmp(
+            (self.upload_time, self.revision, self.host, self.tree, self.compiler),
+            (other.upload_time, other.revision, other.host, other.tree, other.compiler))
+
+    def __eq__(self, other):
+        return (isinstance(other, Build) and
+                self.log_checksum() == other.log_checksum())
 
-    ###################
-    # the mtime age is used to determine if builds are still happening
-    # on a host.
-    # the ctime age is used to determine when the last real build happened
+    def __repr__(self):
+        if self.revision is not None:
+            return "<%s: revision %s of %s on %s using %s>" % (self.__class__.__name__, self.revision, self.tree, self.host, self.compiler)
+        else:
+            return "<%s: %s on %s using %s>" % (self.__class__.__name__, self.tree, self.host, self.compiler)
 
-    def age_mtime(self):
-        """get the age of build from mtime"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
+    def remove_logs(self):
+        # In general, basename.log should *always* exist.
+        if os.path.exists(self.basename+".log"):
+            os.unlink(self.basename + ".log")
+        if os.path.exists(self.basename+".err"):
+            os.unlink(self.basename+".err")
 
-        st = os.stat("%s.log" % file)
-        return time.time() - st.st_mtime
+    def remove(self):
+        self.remove_logs()
 
-    def age_ctime(self):
-        """get the age of build from ctime"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
+    @property
+    def upload_time(self):
+        """get timestamp of build"""
+        st = os.stat("%s.log" % self.basename)
+        return st.st_mtime
 
-        st = os.stat("%s.log" % file)
-        return time.time() - st.st_ctime
+    @property
+    def age(self):
+        """get the age of build"""
+        return time.time() - self.upload_time
 
     def read_log(self):
         """read full log file"""
-        f = open(self._store.build_fname(self.tree, self.host, self.compiler, self.rev)+".log", "r")
         try:
-            return f.read()
-        finally:
-            f.close()
+            return open(self.basename+".log", "r")
+        except IOError:
+            raise LogFileMissing()
 
     def read_err(self):
         """read full err file"""
-        return util.FileLoad(self._store.build_fname(self.tree, self.host, self.compiler, self.rev)+".err")
+        try:
+            return open(self.basename+".err", 'r')
+        except IOError:
+            # No such file
+            return StringIO()
+
+    def log_checksum(self):
+        f = self.read_log()
+        try:
+            return hashlib.sha1(f.read()).hexdigest()
+        finally:
+            f.close()
+
+    def summary(self):
+        (revid, timestamp) = self.revision_details()
+        status = self.status()
+        return BuildSummary(self.host, self.tree, self.compiler, revid, status)
 
     def revision_details(self):
         """get the revision of build
 
         :return: Tuple with revision id and timestamp (if available)
         """
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        revid = None
-        timestamp = None
-        f = open("%s.log" % file, 'r')
+        f = self.read_log()
         try:
-            for l in f.readlines():
-                if l.startswith("BUILD COMMIT REVISION: "):
-                    revid = l.split(":", 1)[1].strip()
-                elif l.startswith("BUILD REVISION: "):
-                    revid = l.split(":", 1)[1].strip()
-                elif l.startswith("BUILD COMMIT TIME"):
-                    timestamp = l.split(":", 1)[1].strip()
+            return revision_from_log(f)
         finally:
             f.close()
 
-        return (revid, timestamp)
-
     def status(self):
         """get status of build
 
         :return: tuple with build status
         """
-
         log = self.read_log()
-        err = self.read_err()
-
-        return build_status_from_logs(log, err)
-
-    def err_count(self):
-        """get status of build"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        try:
-            err = util.FileLoad("%s.err" % file)
-        except OSError:
-            # File does not exist
-            return 0
-
-        return util.count_lines(err)
-
-
-class CachingBuild(Build):
-    """Build subclass that caches some of the results that are expensive
-    to calculate."""
-
-    def revision_details(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        st1 = os.stat("%s.log" % file)
-
         try:
-            st2 = os.stat("%s.revision" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        # the ctime/mtime asymmetry is needed so we don't get fooled by
-        # the mtime update from rsync
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            (revid, timestamp) = util.FileLoad("%s.revision" % cachef).split(":", 1)
-            if timestamp == "":
-                return (revid, None)
-            else:
-                return (revid, timestamp)
-        (revid, timestamp) = super(CachingBuild, self).revision_details()
-        if not self._store.readonly:
-            util.FileSave("%s.revision" % cachef, "%s:%s" % (revid, timestamp or ""))
-        return (revid, timestamp)
+            err = self.read_err()
+            try:
+                return build_status_from_logs(log, err)
+            finally:
+                err.close()
+        finally:
+            log.close()
 
     def err_count(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        st1 = os.stat("%s.err" % file)
-
-        try:
-            st2 = os.stat("%s.errcount" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return util.FileLoad("%s.errcount" % cachef)
-
-        ret = super(CachingBuild, self).err_count()
-
-        if not self._store.readonly:
-            util.FileSave("%s.errcount" % cachef, str(ret))
-
-        return ret
-
-    def status(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachefile = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)+".status"
-
-        st1 = os.stat("%s.log" % file)
+        """get status of build"""
+        file = self.read_err()
+        return len(file.readlines())
 
-        try:
-            st2 = os.stat(cachefile)
-        except OSError:
-            # No such file
-            st2 = None
 
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return BuildStatus(*eval(util.FileLoad(cachefile)))
+class UploadBuildResultStore(object):
 
-        ret = super(CachingBuild, self).status()
+    def __init__(self, path):
+        """Open the database.
 
-        if not self._store.readonly:
-            util.FileSave(cachefile, str(ret))
+        :param path: Build result base directory
+        """
+        self.path = path
 
-        return ret
+    def get_new_builds(self):
+        for name in os.listdir(self.path):
+            try:
+                (build, tree, host, compiler, extension) = name.split(".")
+            except ValueError:
+                continue
+            if build != "build" or extension != "log":
+                continue
+            yield self.get_build(tree, host, compiler)
 
+    def build_fname(self, tree, host, compiler):
+        return os.path.join(self.path, "build.%s.%s.%s" % (tree, host, compiler))
 
+    def has_host(self, host):
+        for name in os.listdir(self.path):
+            try:
+                if name.split(".")[2] == host:
+                    return True
+            except IndexError:
+                pass
+        return False
 
-def read_trees_from_conf(path):
-    """Read trees from a configuration file."""
-    ret = {}
-    cfp = ConfigParser.ConfigParser()
-    cfp.readfp(open(path))
-    for s in cfp.sections():
-        ret[s] = Tree(name=s, **dict(cfp.items(s)))
-    return ret
+    def get_build(self, tree, host, compiler):
+        basename = self.build_fname(tree, host, compiler)
+        logf = "%s.log" % basename
+        if not os.path.exists(logf):
+            raise NoSuchBuildError(tree, host, compiler)
+        return Build(basename, tree, host, compiler)
 
 
 class BuildResultStore(object):
     """The build farm build result database."""
 
-    OLDAGE = 60*60*4,
-    DEADAGE = 60*60*24*4
-    LCOVHOST = "magni"
-
-    def __init__(self, basedir, readonly=False):
+    def __init__(self, path):
         """Open the database.
 
-        :param basedir: Build result base directory
-        :param readonly: Whether to avoid saving cache files
+        :param path: Build result base directory
         """
-        self.basedir = basedir
-        check_dir_exists("base", self.basedir)
-        self.readonly = readonly
-
-        self.webdir = os.path.join(basedir, "web")
-        check_dir_exists("web", self.webdir)
-
-        self.datadir = os.path.join(basedir, "data")
-        check_dir_exists("data", self.datadir)
-
-        self.cachedir = os.path.join(basedir, "cache")
-        check_dir_exists("cache", self.cachedir)
-
-        self.lcovdir = os.path.join(basedir, "lcov/data")
-        check_dir_exists("lcov", self.lcovdir)
-
-        self.compilers = util.load_list(os.path.join(self.webdir, "compilers.list"))
+        self.path = path
 
-        self.trees = read_trees_from_conf(os.path.join(self.webdir, "trees.conf"))
+    def __contains__(self, build):
+        try:
+            if build.revision:
+                rev = build.revision
+            else:
+                rev, timestamp = build.revision_details()
+            self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            return False
+        else:
+            return True
 
-    def get_build(self, tree, host, compiler, rev=None):
-        logf = self.build_fname(tree, host, compiler, rev) + ".log"
+    def get_build(self, tree, host, compiler, rev, checksum=None):
+        basename = self.build_fname(tree, host, compiler, rev)
+        logf = "%s.log" % basename
         if not os.path.exists(logf):
             raise NoSuchBuildError(tree, host, compiler, rev)
-        return CachingBuild(self, tree, host, compiler, rev)
-
-    def cache_fname(self, tree, host, compiler, rev=None):
-        if rev is not None:
-            return os.path.join(self.cachedir, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
-        else:
-            return os.path.join(self.cachedir, "build.%s.%s.%s" % (tree, host, compiler))
+        return Build(basename, tree, host, compiler, rev)
 
-    def build_fname(self, tree, host, compiler, rev=None):
+    def build_fname(self, tree, host, compiler, rev):
         """get the name of the build file"""
-        if rev is not None:
-            return os.path.join(self.datadir, "oldrevs/build.%s.%s.%s-%s" % (tree, host, compiler, rev))
-        return os.path.join(self.datadir, "upload/build.%s.%s.%s" % (tree, host, compiler))
-
-    def lcov_status(self, tree):
-        """get status of build"""
-        cachefile = os.path.join(self.cachedir, "lcov.%s.%s.status" % (
-            self.LCOVHOST, tree))
-        file = os.path.join(self.lcovdir, self.LCOVHOST, tree, "index.html")
-        try:
-            st1 = os.stat(file)
-        except OSError:
-            # File does not exist
-            raise NoSuchBuildError(tree, self.LCOVHOST, "lcov")
-        try:
-            st2 = os.stat(cachefile)
-        except OSError:
-            # file does not exist
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            ret = util.FileLoad(cachefile)
-            if ret == "":
-                return None
-            return ret
-
-        lcov_html = util.FileLoad(file)
-        perc = lcov_extract_percentage(lcov_html)
-        if perc is None:
-            ret = ""
-        else:
-            ret = perc
-        if self.readonly:
-            util.FileSave(cachefile, ret)
-        return perc
-
-    def get_old_revs(self, tree, host, compiler):
+        return os.path.join(self.path, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
+
+    def get_all_builds(self):
+        for l in os.listdir(self.path):
+            m = re.match("^build\.([0-9A-Za-z]+)\.([0-9A-Za-z]+)\.([0-9A-Za-z]+)-([0-9A-Fa-f]+).log$", l)
+            if not m:
+                continue
+            tree = m.group(1)
+            host = m.group(2)
+            compiler = m.group(3)
+            rev = m.group(4)
+            stat = os.stat(os.path.join(self.path, l))
+            # skip the current build
+            if stat.st_nlink == 2:
+                continue
+            yield self.get_build(tree, host, compiler, rev)
+
+    def get_old_builds(self, tree, host, compiler):
         """get a list of old builds and their status."""
         ret = []
-        directory = os.path.join(self.datadir, "oldrevs")
-        logfiles = [d for d in os.listdir(directory) if d.startswith("build.%s.%s.%s-" % (tree, host, compiler)) and d.endswith(".log")]
-        for l in logfiles:
-            m = re.match(".*-([0-9A-Fa-f]+).log$", l)
-            if m:
-                rev = m.group(1)
-                stat = os.stat(os.path.join(directory, l))
-                # skip the current build
-                if stat.st_nlink == 2:
-                    continue
-                build = self.get_build(tree, host, compiler, rev)
-                r = {
-                    "STATUS": build.status(),
-                    "REVISION": rev,
-                    "TIMESTAMP": build.age_ctime(),
-                    }
-                ret.append(r)
-
-        ret.sort(lambda a, b: cmp(a["TIMESTAMP"], b["TIMESTAMP"]))
-
+        for build in self.get_all_builds():
+            if build.tree == tree and build.host == host and build.compiler == compiler:
+                ret.append(build)
+        ret.sort(lambda a, b: cmp(a.upload_time, b.upload_time))
         return ret
 
-    def has_host(self, host):
-        for name in os.listdir(os.path.join(self.datadir, "upload")):
-            try:
-                if name.split(".")[2] == host:
-                    return True
-            except IndexError:
-                pass
-        return False
+    def upload_build(self, build):
+        (rev, rev_timestamp) = build.revision_details()
 
-    def host_age(self, host):
-        """get the overall age of a host"""
-        ret = None
-        for compiler in self.compilers:
-            for tree in self.trees:
-                try:
-                    build = self.get_build(tree, host, compiler)
-                except NoSuchBuildError:
-                    pass
-                else:
-                    ret = min(ret, build.age_mtime())
-        return ret
+        new_basename = self.build_fname(build.tree, build.host, build.compiler, rev)
+        try:
+            existing_build = self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            if os.path.exists(new_basename+".log"):
+                os.remove(new_basename+".log")
+            if os.path.exists(new_basename+".err"):
+                os.remove(new_basename+".err")
+        else:
+            existing_build.remove_logs()
+        os.link(build.basename+".log", new_basename+".log")
+        if os.path.exists(build.basename+".err"):
+            os.link(build.basename+".err", new_basename+".err")
+        return Build(new_basename, build.tree, build.host, build.compiler, rev)
+
+    def get_previous_revision(self, tree, host, compiler, revision):
+        raise NoSuchBuildError(tree, host, compiler, revision)
+
+    def get_latest_revision(self, tree, host, compiler):
+        raise NoSuchBuildError(tree, host, compiler)