Split up pages a bit.
[amitay/build-farm.git] / buildfarm / data.py
index 80a633c456af8c88949e4ceaebdcb3b2e65799cf..750deac94f644d6f2d83eaa14eb5e42be94eb6c6 100644 (file)
 #   along with this program; if not, write to the Free Software
 #   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 
-
-import ConfigParser
 from cStringIO import StringIO
+import collections
 import hashlib
 import os
 import re
 import time
-import util
+
+
+class BuildSummary(object):
+
+    def __init__(self, host, tree, compiler, revision, status):
+        self.host = host
+        self.tree = tree
+        self.compiler = compiler
+        self.revision = revision
+        self.status = status
+
+
+BuildStageResult = collections.namedtuple("BuildStageResult", "name result")
+
+
+class MissingRevisionInfo(Exception):
+    """Revision info could not be found in the build log."""
+
+    def __init__(self, build):
+        self.build = build
 
 
 class BuildStatus(object):
@@ -43,22 +61,64 @@ class BuildStatus(object):
         else:
             self.other_failures = set()
 
+    @property
+    def failed(self):
+        if self.other_failures:
+            return True
+        return not all([x == 0 for x in self._status_tuple()])
+
+    def __serialize__(self):
+        return repr(self)
+
+    @classmethod
+    def __deserialize__(cls, text):
+        return eval(text)
+
+    def __str__(self):
+        if self.other_failures:
+            return ",".join(self.other_failures)
+        return "/".join(map(str, self._status_tuple()))
+
     def broken_host(self):
         if "disk full" in self.other_failures:
             return True
         return False
 
     def _status_tuple(self):
-        return [v for (k, v) in self.stages]
+        return [sr.result for sr in self.stages]
 
     def regressed_since(self, other):
         """Check if this build has regressed since another build."""
         if "disk full" in self.other_failures:
             return False
+        if "timeout" in self.other_failures and "timeout" in other.other_failures:
+            # When the timeout happens exactly can differ slightly, so it's okay
+            # if the numbers are a bit different..
+            return False
+        if "panic" in self.other_failures and not "panic" in other.other_failures:
+            return True
         return cmp(self._status_tuple(), other._status_tuple())
 
-    def __str__(self):
-        return repr((self.stages, self.other_failures))
+    def __cmp__(self, other):
+        other_extra = other.other_failures - self.other_failures
+        self_extra = self.other_failures - other.other_failures
+        # Give more importance to other failures
+        if other_extra:
+            return 1
+        if self_extra:
+            return -1
+
+        la = len(self.stages)
+        lb = len(other.stages)
+        if la > lb:
+            return 1
+        elif lb > la:
+            return -1
+        else:
+            return cmp(other.stages, self.stages)
+
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__, self.stages, self.other_failures)
 
 
 def check_dir_exists(kind, path):
@@ -70,23 +130,21 @@ def build_status_from_logs(log, err):
     """get status of build"""
     test_failures = 0
     test_successes = 0
+    test_seen = 0
     ret = BuildStatus()
 
     stages = []
+    re_status = re.compile("^([A-Z_]+) STATUS:(\s*\d+)$")
+    re_action = re.compile("^ACTION (PASSED|FAILED):\s+test$")
 
     for l in log:
-        m = re.match("^([A-Z_]+) STATUS:(\s*\d+)$", l)
-        if m:
-            stages.append((m.group(1), int(m.group(2).strip())))
-            continue
         if l.startswith("No space left on device"):
             ret.other_failures.add("disk full")
             continue
         if l.startswith("maximum runtime exceeded"):
             ret.other_failures.add("timeout")
             continue
-        m = re.match("^(PANIC|INTERNAL ERROR):.*$", l)
-        if m:
+        if l.startswith("PANIC:") or l.startswith("INTERNAL ERROR:"):
             ret.other_failures.add("panic")
             continue
         if l.startswith("testsuite-failure: ") or l.startswith("testsuite-error: "):
@@ -95,37 +153,41 @@ def build_status_from_logs(log, err):
         if l.startswith("testsuite-success: "):
             test_successes += 1
             continue
+        m = re_status.match(l)
+        if m:
+            stages.append(BuildStageResult(m.group(1), int(m.group(2).strip())))
+            if m.group(1) == "TEST":
+                test_seen = 1
+            continue
+        m = re_action.match(l)
+        if m and not test_seen:
+            if m.group(1) == "PASSED":
+                stages.append(BuildStageResult("TEST", 0))
+            else:
+                stages.append(BuildStageResult("TEST", 1))
+            continue
 
     # Scan err file for specific errors
     for l in err:
         if "No space left on device" in l:
             ret.other_failures.add("disk full")
 
-    stage_results = dict(stages)
-    def map_stage(name, result):
-        if name != "TEST":
-            return (name, result)
+    def map_stage(sr):
+        if sr.name != "TEST":
+            return sr
         # TEST is special
         if test_successes + test_failures == 0:
             # No granular test output
-            return ("TEST", result)
-        if result == 0 and test_failures == 0:
+            return BuildStageResult("TEST", sr.result)
+        if sr.result == 1 and test_failures == 0:
             ret.other_failures.add("inconsistent test result")
-            return ("TEST", -1)
-        return ("TEST", test_failures)
+            return BuildStageResult("TEST", -1)
+        return BuildStageResult("TEST", test_failures)
 
-    ret.stages = [map_stage(name, result) for (name, result) in stages]
+    ret.stages = map(map_stage, stages)
     return ret
 
 
-def lcov_extract_percentage(text):
-    m = re.search('\<td class="headerItem".*?\>Code\&nbsp\;covered\:\<\/td\>.*?\n.*?\<td class="headerValue".*?\>([0-9.]+) \%', text)
-    if m:
-        return m.group(1)
-    else:
-        return None
-
-
 class NoSuchBuildError(Exception):
     """The build with the specified name does not exist."""
 
@@ -136,32 +198,30 @@ class NoSuchBuildError(Exception):
         self.rev = rev
 
 
-class Tree(object):
-    """A tree to build."""
-
-    def __init__(self, name, scm, repo, branch, subdir="", srcdir=""):
-        self.name = name
-        self.repo = repo
-        self.scm = scm
-        self.branch = branch
-        self.subdir = subdir
-        self.srcdir = srcdir
-        self.scm = scm
-
-    def __repr__(self):
-        return "<%s %r>" % (self.__class__.__name__, self.name)
-
-
 class Build(object):
     """A single build of a tree on a particular host using a particular compiler.
     """
 
-    def __init__(self, store, tree, host, compiler, rev=None):
-        self._store = store
+    def __init__(self, basename, tree, host, compiler, rev=None):
+        self.basename = basename
         self.tree = tree
         self.host = host
         self.compiler = compiler
-        self.rev = rev
+        self.commit_revision = self.revision = rev
+
+    def __repr__(self):
+        if self.revision is not None:
+            return "<%s: revision %s of %s on %s using %s>" % (self.__class__.__name__, self.revision, self.tree, self.host, self.compiler)
+        else:
+            return "<%s: %s on %s using %s>" % (self.__class__.__name__, self.tree, self.host, self.compiler)
+
+    def remove_logs(self):
+        os.unlink(self.basename + ".log")
+        if os.path.exists(self.basename+".err"):
+            os.unlink(self.basename+".err")
+
+    def remove(self):
+        self.remove_logs()
 
     ###################
     # the mtime age is used to determine if builds are still happening
@@ -170,31 +230,26 @@ class Build(object):
 
     def age_mtime(self):
         """get the age of build from mtime"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        st = os.stat("%s.log" % file)
+        st = os.stat("%s.log" % self.basename)
         return time.time() - st.st_mtime
 
     def age_ctime(self):
         """get the age of build from ctime"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        st = os.stat("%s.log" % file)
+        st = os.stat("%s.log" % self.basename)
         return time.time() - st.st_ctime
 
     def read_log(self):
         """read full log file"""
-        return open(self._store.build_fname(self.tree, self.host, self.compiler, self.rev)+".log", "r")
+        return open(self.basename+".log", "r")
 
     def read_err(self):
         """read full err file"""
         try:
-            return open(self._store.build_fname(self.tree, self.host, self.compiler, self.rev)+".err", 'r')
+            return open(self.basename+".err", 'r')
         except IOError:
             # No such file
             return StringIO()
 
-
     def log_checksum(self):
         f = self.read_log()
         try:
@@ -202,29 +257,32 @@ class Build(object):
         finally:
             f.close()
 
+    def summary(self):
+        (revid, timestamp) = self.revision_details()
+        status = self.status()
+        return BuildSummary(self.host, self.tree, self.compiler, revid, status)
+
     def revision_details(self):
         """get the revision of build
 
         :return: Tuple with revision id and timestamp (if available)
         """
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
         revid = None
-        commit_revid = None
         timestamp = None
-        f = open("%s.log" % file, 'r')
+        f = self.read_log()
         try:
-            for l in f.readlines():
+            for l in f:
                 if l.startswith("BUILD COMMIT REVISION: "):
-                    commit_revid = l.split(":", 1)[1].strip()
-                elif l.startswith("BUILD REVISION: "):
                     revid = l.split(":", 1)[1].strip()
                 elif l.startswith("BUILD COMMIT TIME"):
                     timestamp = l.split(":", 1)[1].strip()
         finally:
             f.close()
 
-        return (revid, commit_revid, timestamp)
+        if revid is None:
+            raise MissingRevisionInfo(self)
+
+        return (revid, timestamp)
 
     def status(self):
         """get status of build
@@ -243,229 +301,117 @@ class Build(object):
 
     def err_count(self):
         """get status of build"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        try:
-            err = util.FileLoad("%s.err" % file)
-        except OSError:
-            # File does not exist
-            return 0
-
-        return util.count_lines(err)
-
-
-class CachingBuild(Build):
-    """Build subclass that caches some of the results that are expensive
-    to calculate."""
-
-    def revision_details(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        st1 = os.stat("%s.log" % file)
+        file = self.read_err()
+        return len(file.readlines())
 
-        try:
-            st2 = os.stat("%s.revision" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        # the ctime/mtime asymmetry is needed so we don't get fooled by
-        # the mtime update from rsync
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            (revid, commit_revid, timestamp) = util.FileLoad("%s.revision" % cachef).split(":", 2)
-            if timestamp == "":
-                timestamp = None
-            if revid == "":
-                revid = None
-            if commit_revid == "":
-                commit_revid = None
-            return (revid, commit_revid, timestamp)
-        (revid, commit_revid, timestamp) = super(CachingBuild, self).revision_details()
-        if not self._store.readonly:
-            util.FileSave("%s.revision" % cachef, "%s:%s:%s" % (revid, commit_revid or "", timestamp or ""))
-        return (revid, commit_revid, timestamp)
-
-    def err_count(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        st1 = os.stat("%s.err" % file)
-
-        try:
-            st2 = os.stat("%s.errcount" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return util.FileLoad("%s.errcount" % cachef)
-
-        ret = super(CachingBuild, self).err_count()
-
-        if not self._store.readonly:
-            util.FileSave("%s.errcount" % cachef, str(ret))
 
-        return ret
-
-    def status(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachefile = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)+".status"
-
-        st1 = os.stat("%s.log" % file)
-
-        try:
-            st2 = os.stat(cachefile)
-        except OSError:
-            # No such file
-            st2 = None
+class UploadBuildResultStore(object):
 
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return BuildStatus(*eval(util.FileLoad(cachefile)))
+    def __init__(self, path):
+        """Open the database.
 
-        ret = super(CachingBuild, self).status()
+        :param path: Build result base directory
+        """
+        self.path = path
 
-        if not self._store.readonly:
-            util.FileSave(cachefile, str(ret))
+    def get_new_builds(self):
+        for name in os.listdir(self.path):
+            try:
+                (build, tree, host, compiler, extension) = name.split(".")
+            except ValueError:
+                continue
+            if build != "build" or extension != "log":
+                continue
+            yield self.get_build(tree, host, compiler)
 
-        return ret
+    def build_fname(self, tree, host, compiler):
+        return os.path.join(self.path, "build.%s.%s.%s" % (tree, host, compiler))
 
+    def has_host(self, host):
+        for name in os.listdir(self.path):
+            try:
+                if name.split(".")[2] == host:
+                    return True
+            except IndexError:
+                pass
+        return False
 
-def read_trees_from_conf(path):
-    """Read trees from a configuration file."""
-    ret = {}
-    cfp = ConfigParser.ConfigParser()
-    cfp.readfp(open(path))
-    for s in cfp.sections():
-        ret[s] = Tree(name=s, **dict(cfp.items(s)))
-    return ret
+    def get_build(self, tree, host, compiler):
+        basename = self.build_fname(tree, host, compiler)
+        logf = "%s.log" % basename
+        if not os.path.exists(logf):
+            raise NoSuchBuildError(tree, host, compiler)
+        return Build(basename, tree, host, compiler)
 
 
 class BuildResultStore(object):
     """The build farm build result database."""
 
-    OLDAGE = 60*60*4,
-    DEADAGE = 60*60*24*4
-    LCOVHOST = "magni"
-
-    def __init__(self, basedir, readonly=False):
+    def __init__(self, path):
         """Open the database.
 
-        :param basedir: Build result base directory
-        :param readonly: Whether to avoid saving cache files
+        :param path: Build result base directory
         """
-        self.basedir = basedir
-        check_dir_exists("base", self.basedir)
-        self.readonly = readonly
+        self.path = path
 
-        self.webdir = os.path.join(basedir, "web")
-        check_dir_exists("web", self.webdir)
-
-        self.datadir = os.path.join(basedir, "data")
-        check_dir_exists("data", self.datadir)
-
-        self.cachedir = os.path.join(basedir, "cache")
-        check_dir_exists("cache", self.cachedir)
-
-        self.lcovdir = os.path.join(basedir, "lcov/data")
-        check_dir_exists("lcov", self.lcovdir)
-
-        self.compilers = util.load_list(os.path.join(self.webdir, "compilers.list"))
-
-        self.trees = read_trees_from_conf(os.path.join(self.webdir, "trees.conf"))
+    def __contains__(self, build):
+        try:
+            if build.revision:
+                rev = build.revision
+            else:
+                rev, timestamp = build.revision_details()
+            self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            return False
+        else:
+            return True
 
-    def get_build(self, tree, host, compiler, rev=None):
-        logf = self.build_fname(tree, host, compiler, rev) + ".log"
+    def get_build(self, tree, host, compiler, rev):
+        basename = self.build_fname(tree, host, compiler, rev)
+        logf = "%s.log" % basename
         if not os.path.exists(logf):
             raise NoSuchBuildError(tree, host, compiler, rev)
-        return CachingBuild(self, tree, host, compiler, rev)
-
-    def cache_fname(self, tree, host, compiler, rev=None):
-        if rev is not None:
-            return os.path.join(self.cachedir, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
-        else:
-            return os.path.join(self.cachedir, "build.%s.%s.%s" % (tree, host, compiler))
+        return Build(basename, tree, host, compiler, rev)
 
-    def build_fname(self, tree, host, compiler, rev=None):
+    def build_fname(self, tree, host, compiler, rev):
         """get the name of the build file"""
-        if rev is not None:
-            return os.path.join(self.datadir, "oldrevs/build.%s.%s.%s-%s" % (tree, host, compiler, rev))
-        return os.path.join(self.datadir, "upload/build.%s.%s.%s" % (tree, host, compiler))
-
-    def lcov_status(self, tree):
-        """get status of build"""
-        cachefile = os.path.join(self.cachedir, "lcov.%s.%s.status" % (
-            self.LCOVHOST, tree))
-        file = os.path.join(self.lcovdir, self.LCOVHOST, tree, "index.html")
-        try:
-            st1 = os.stat(file)
-        except OSError:
-            # File does not exist
-            raise NoSuchBuildError(tree, self.LCOVHOST, "lcov")
-        try:
-            st2 = os.stat(cachefile)
-        except OSError:
-            # file does not exist
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            ret = util.FileLoad(cachefile)
-            if ret == "":
-                return None
-            return ret
-
-        lcov_html = util.FileLoad(file)
-        perc = lcov_extract_percentage(lcov_html)
-        if perc is None:
-            ret = ""
-        else:
-            ret = perc
-        if self.readonly:
-            util.FileSave(cachefile, ret)
-        return perc
+        return os.path.join(self.path, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
 
     def get_old_revs(self, tree, host, compiler):
         """get a list of old builds and their status."""
         ret = []
-        directory = os.path.join(self.datadir, "oldrevs")
-        logfiles = [d for d in os.listdir(directory) if d.startswith("build.%s.%s.%s-" % (tree, host, compiler)) and d.endswith(".log")]
+        logfiles = [d for d in os.listdir(self.path) if d.startswith("build.%s.%s.%s-" % (tree, host, compiler)) and d.endswith(".log")]
         for l in logfiles:
             m = re.match(".*-([0-9A-Fa-f]+).log$", l)
             if m:
                 rev = m.group(1)
-                stat = os.stat(os.path.join(directory, l))
+                stat = os.stat(os.path.join(self.path, l))
                 # skip the current build
                 if stat.st_nlink == 2:
                     continue
-                build = self.get_build(tree, host, compiler, rev)
-                r = {
-                    "STATUS": build.status(),
-                    "REVISION": rev,
-                    "TIMESTAMP": build.age_ctime(),
-                    }
-                ret.append(r)
+                ret.append(self.get_build(tree, host, compiler, rev))
 
-        ret.sort(lambda a, b: cmp(a["TIMESTAMP"], b["TIMESTAMP"]))
+        ret.sort(lambda a, b: cmp(a.age_mtime(), b.age_mtime()))
 
         return ret
 
-    def has_host(self, host):
-        for name in os.listdir(os.path.join(self.datadir, "upload")):
-            try:
-                if name.split(".")[2] == host:
-                    return True
-            except IndexError:
-                pass
-        return False
+    def upload_build(self, build):
+        (rev, rev_timestamp) = build.revision_details()
 
-    def host_age(self, host):
-        """get the overall age of a host"""
-        # FIXME: Turn this into a simple SQL query, or use something in hostdb ?
-        ret = None
-        for compiler in self.compilers:
-            for tree in self.trees:
-                try:
-                    build = self.get_build(tree, host, compiler)
-                except NoSuchBuildError:
-                    pass
-                else:
-                    ret = min(ret, build.age_mtime())
-        return ret
+        new_basename = self.build_fname(build.tree, build.host, build.compiler, rev)
+        try:
+            existing_build = self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            pass
+        else:
+            existing_build.remove_logs()
+        os.link(build.basename+".log", new_basename+".log")
+        if os.path.exists(build.basename+".err"):
+            os.link(build.basename+".err", new_basename+".err")
+        return Build(new_basename, build.tree, build.host, build.compiler, rev)
+
+    def get_previous_revision(self, tree, host, compiler, revision):
+        raise NoSuchBuildError(tree, host, compiler, revision)
+
+    def get_latest_revision(self, tree, host, compiler):
+        raise NoSuchBuildError(tree, host, compiler)