Split up pages a bit.
[amitay/build-farm.git] / buildfarm / data.py
index 887ea6b552037fc36191af605e0ce809d13a09ab..750deac94f644d6f2d83eaa14eb5e42be94eb6c6 100644 (file)
 #   along with this program; if not, write to the Free Software
 #   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 
-
-import ConfigParser
+from cStringIO import StringIO
+import collections
+import hashlib
 import os
 import re
 import time
-import util
 
 
-def span(classname, contents):
-    return "<span class=\"%s\">%s</span>" % (classname, contents)
+class BuildSummary(object):
+
+    def __init__(self, host, tree, compiler, revision, status):
+        self.host = host
+        self.tree = tree
+        self.compiler = compiler
+        self.revision = revision
+        self.status = status
+
+
+BuildStageResult = collections.namedtuple("BuildStageResult", "name result")
+
+
+class MissingRevisionInfo(Exception):
+    """Revision info could not be found in the build log."""
+
+    def __init__(self, build):
+        self.build = build
+
+
+class BuildStatus(object):
+
+    def __init__(self, stages=None, other_failures=None):
+        if stages is not None:
+            self.stages = stages
+        else:
+            self.stages = []
+        if other_failures is not None:
+            self.other_failures = other_failures
+        else:
+            self.other_failures = set()
+
+    @property
+    def failed(self):
+        if self.other_failures:
+            return True
+        return not all([x == 0 for x in self._status_tuple()])
+
+    def __serialize__(self):
+        return repr(self)
+
+    @classmethod
+    def __deserialize__(cls, text):
+        return eval(text)
+
+    def __str__(self):
+        if self.other_failures:
+            return ",".join(self.other_failures)
+        return "/".join(map(str, self._status_tuple()))
+
+    def broken_host(self):
+        if "disk full" in self.other_failures:
+            return True
+        return False
+
+    def _status_tuple(self):
+        return [sr.result for sr in self.stages]
+
+    def regressed_since(self, other):
+        """Check if this build has regressed since another build."""
+        if "disk full" in self.other_failures:
+            return False
+        if "timeout" in self.other_failures and "timeout" in other.other_failures:
+            # When the timeout happens exactly can differ slightly, so it's okay
+            # if the numbers are a bit different..
+            return False
+        if "panic" in self.other_failures and not "panic" in other.other_failures:
+            return True
+        return cmp(self._status_tuple(), other._status_tuple())
+
+    def __cmp__(self, other):
+        other_extra = other.other_failures - self.other_failures
+        self_extra = self.other_failures - other.other_failures
+        # Give more importance to other failures
+        if other_extra:
+            return 1
+        if self_extra:
+            return -1
+
+        la = len(self.stages)
+        lb = len(other.stages)
+        if la > lb:
+            return 1
+        elif lb > la:
+            return -1
+        else:
+            return cmp(other.stages, self.stages)
+
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__, self.stages, self.other_failures)
 
 
 def check_dir_exists(kind, path):
@@ -38,6 +126,68 @@ def check_dir_exists(kind, path):
         raise Exception("%s directory %s does not exist" % (kind, path))
 
 
+def build_status_from_logs(log, err):
+    """get status of build"""
+    test_failures = 0
+    test_successes = 0
+    test_seen = 0
+    ret = BuildStatus()
+
+    stages = []
+    re_status = re.compile("^([A-Z_]+) STATUS:(\s*\d+)$")
+    re_action = re.compile("^ACTION (PASSED|FAILED):\s+test$")
+
+    for l in log:
+        if l.startswith("No space left on device"):
+            ret.other_failures.add("disk full")
+            continue
+        if l.startswith("maximum runtime exceeded"):
+            ret.other_failures.add("timeout")
+            continue
+        if l.startswith("PANIC:") or l.startswith("INTERNAL ERROR:"):
+            ret.other_failures.add("panic")
+            continue
+        if l.startswith("testsuite-failure: ") or l.startswith("testsuite-error: "):
+            test_failures += 1
+            continue
+        if l.startswith("testsuite-success: "):
+            test_successes += 1
+            continue
+        m = re_status.match(l)
+        if m:
+            stages.append(BuildStageResult(m.group(1), int(m.group(2).strip())))
+            if m.group(1) == "TEST":
+                test_seen = 1
+            continue
+        m = re_action.match(l)
+        if m and not test_seen:
+            if m.group(1) == "PASSED":
+                stages.append(BuildStageResult("TEST", 0))
+            else:
+                stages.append(BuildStageResult("TEST", 1))
+            continue
+
+    # Scan err file for specific errors
+    for l in err:
+        if "No space left on device" in l:
+            ret.other_failures.add("disk full")
+
+    def map_stage(sr):
+        if sr.name != "TEST":
+            return sr
+        # TEST is special
+        if test_successes + test_failures == 0:
+            # No granular test output
+            return BuildStageResult("TEST", sr.result)
+        if sr.result == 1 and test_failures == 0:
+            ret.other_failures.add("inconsistent test result")
+            return BuildStageResult("TEST", -1)
+        return BuildStageResult("TEST", test_failures)
+
+    ret.stages = map(map_stage, stages)
+    return ret
+
+
 class NoSuchBuildError(Exception):
     """The build with the specified name does not exist."""
 
@@ -48,30 +198,30 @@ class NoSuchBuildError(Exception):
         self.rev = rev
 
 
-class Tree(object):
-    """A tree to build."""
-
-    def __init__(self, name, scm, repo, branch, subdir="", srcdir=""):
-        self.name = name
-        self.repo = repo
-        self.branch = branch
-        self.subdir = subdir
-        self.srcdir = srcdir
-
-    def __repr__(self):
-        return "<%s %r>" % (self.__class__.__name__, self.name)
-
-
 class Build(object):
     """A single build of a tree on a particular host using a particular compiler.
     """
 
-    def __init__(self, store, tree, host, compiler, rev=None):
-        self._store = store
+    def __init__(self, basename, tree, host, compiler, rev=None):
+        self.basename = basename
         self.tree = tree
         self.host = host
         self.compiler = compiler
-        self.rev = rev
+        self.commit_revision = self.revision = rev
+
+    def __repr__(self):
+        if self.revision is not None:
+            return "<%s: revision %s of %s on %s using %s>" % (self.__class__.__name__, self.revision, self.tree, self.host, self.compiler)
+        else:
+            return "<%s: %s on %s using %s>" % (self.__class__.__name__, self.tree, self.host, self.compiler)
+
+    def remove_logs(self):
+        os.unlink(self.basename + ".log")
+        if os.path.exists(self.basename+".err"):
+            os.unlink(self.basename+".err")
+
+    def remove(self):
+        self.remove_logs()
 
     ###################
     # the mtime age is used to determine if builds are still happening
@@ -80,408 +230,188 @@ class Build(object):
 
     def age_mtime(self):
         """get the age of build from mtime"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        st = os.stat("%s.log" % file)
+        st = os.stat("%s.log" % self.basename)
         return time.time() - st.st_mtime
 
     def age_ctime(self):
         """get the age of build from ctime"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        st = os.stat("%s.log" % file)
+        st = os.stat("%s.log" % self.basename)
         return time.time() - st.st_ctime
 
     def read_log(self):
         """read full log file"""
-        f = open(self._store.build_fname(self.tree, self.host, self.compiler, self.rev)+".log", "r")
+        return open(self.basename+".log", "r")
+
+    def read_err(self):
+        """read full err file"""
         try:
-            return f.read()
+            return open(self.basename+".err", 'r')
+        except IOError:
+            # No such file
+            return StringIO()
+
+    def log_checksum(self):
+        f = self.read_log()
+        try:
+            return hashlib.sha1(f.read()).hexdigest()
         finally:
             f.close()
 
-    def read_err(self):
-        """read full err file"""
-        return util.FileLoad(self._store.build_fname(self.tree, self.host, self.compiler, self.rev)+".err")
+    def summary(self):
+        (revid, timestamp) = self.revision_details()
+        status = self.status()
+        return BuildSummary(self.host, self.tree, self.compiler, revid, status)
 
     def revision_details(self):
         """get the revision of build
 
         :return: Tuple with revision id and timestamp (if available)
         """
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
         revid = None
         timestamp = None
-        f = open("%s.log" % file, 'r')
+        f = self.read_log()
         try:
-            for l in f.readlines():
+            for l in f:
                 if l.startswith("BUILD COMMIT REVISION: "):
                     revid = l.split(":", 1)[1].strip()
-                elif l.startswith("BUILD REVISION: "):
-                    revid = l.split(":", 1)[1].strip()
                 elif l.startswith("BUILD COMMIT TIME"):
                     timestamp = l.split(":", 1)[1].strip()
         finally:
             f.close()
 
+        if revid is None:
+            raise MissingRevisionInfo(self)
+
         return (revid, timestamp)
 
     def status(self):
         """get status of build
 
-        :return: string with build status
+        :return: tuple with build status
         """
-        # FIXME: This should return a tuple
-
         log = self.read_log()
-        err = self.read_err()
-
-        return self._store.html_build_status_from_logs(log, err)
-
-    def err_count(self):
-        """get status of build"""
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-
-        try:
-            err = util.FileLoad("%s.err" % file)
-        except OSError:
-            # File does not exist
-            return 0
-
-        return util.count_lines(err)
-
-
-class CachingBuild(Build):
-    """Build subclass that caches some of the results that are expensive
-    to calculate."""
-
-    def revision_details(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        st1 = os.stat("%s.log" % file)
-
         try:
-            st2 = os.stat("%s.revision" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        # the ctime/mtime asymmetry is needed so we don't get fooled by
-        # the mtime update from rsync
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            (revid, timestamp) = util.FileLoad("%s.revision" % cachef).split(":", 1)
-            if timestamp == "":
-                return (revid, None)
-            else:
-                return (revid, timestamp)
-        (revid, timestamp) = super(CachingBuild, self).revision_details()
-        if not self._store.readonly:
-            util.FileSave("%s.revision" % cachef, "%s:%s" % (revid, timestamp or ""))
-        return (revid, timestamp)
+            err = self.read_err()
+            try:
+                return build_status_from_logs(log, err)
+            finally:
+                err.close()
+        finally:
+            log.close()
 
     def err_count(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        st1 = os.stat("%s.err" % file)
-
-        try:
-            st2 = os.stat("%s.errcount" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return util.FileLoad("%s.errcount" % cachef)
-
-        ret = super(CachingBuild, self).err_count()
-
-        if not self._store.readonly:
-            util.FileSave("%s.errcount" % cachef, str(ret))
-
-        return ret
-
-    def status(self):
-        file = self._store.build_fname(self.tree, self.host, self.compiler, self.rev)
-        cachefile = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)+".status"
-
-        st1 = os.stat("%s.log" % file)
-
-        try:
-            st2 = os.stat(cachefile)
-        except OSError:
-            # No such file
-            st2 = None
+        """get status of build"""
+        file = self.read_err()
+        return len(file.readlines())
 
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return util.FileLoad(cachefile)
 
-        ret = super(CachingBuild, self).status()
+class UploadBuildResultStore(object):
 
-        if not self._store.readonly:
-            util.FileSave(cachefile, ret)
+    def __init__(self, path):
+        """Open the database.
 
-        return ret
+        :param path: Build result base directory
+        """
+        self.path = path
 
+    def get_new_builds(self):
+        for name in os.listdir(self.path):
+            try:
+                (build, tree, host, compiler, extension) = name.split(".")
+            except ValueError:
+                continue
+            if build != "build" or extension != "log":
+                continue
+            yield self.get_build(tree, host, compiler)
 
+    def build_fname(self, tree, host, compiler):
+        return os.path.join(self.path, "build.%s.%s.%s" % (tree, host, compiler))
 
-def read_trees_from_conf(path):
-    """Read trees from a configuration file."""
-    ret = {}
-    cfp = ConfigParser.ConfigParser()
-    cfp.readfp(open(path))
-    for s in cfp.sections():
-        ret[s] = Tree(name=s, **dict(cfp.items(s)))
-    return s
+    def has_host(self, host):
+        for name in os.listdir(self.path):
+            try:
+                if name.split(".")[2] == host:
+                    return True
+            except IndexError:
+                pass
+        return False
+
+    def get_build(self, tree, host, compiler):
+        basename = self.build_fname(tree, host, compiler)
+        logf = "%s.log" % basename
+        if not os.path.exists(logf):
+            raise NoSuchBuildError(tree, host, compiler)
+        return Build(basename, tree, host, compiler)
 
 
 class BuildResultStore(object):
     """The build farm build result database."""
 
-    OLDAGE = 60*60*4,
-    DEADAGE = 60*60*24*4
-    LCOVHOST = "magni"
-
-    def __init__(self, basedir, readonly=False):
+    def __init__(self, path):
         """Open the database.
 
-        :param basedir: Build result base directory
-        :param readonly: Whether to avoid saving cache files
+        :param path: Build result base directory
         """
-        self.basedir = basedir
-        check_dir_exists("base", self.basedir)
-        self.readonly = readonly
-
-        self.webdir = os.path.join(basedir, "web")
-        check_dir_exists("web", self.webdir)
+        self.path = path
 
-        self.datadir = os.path.join(basedir, "data")
-        check_dir_exists("data", self.datadir)
-
-        self.cachedir = os.path.join(basedir, "cache")
-        check_dir_exists("cache", self.cachedir)
-
-        self.lcovdir = os.path.join(basedir, "lcov/data")
-        check_dir_exists("lcov", self.lcovdir)
-
-        self.compilers = util.load_list(os.path.join(self.webdir, "compilers.list"))
-        self.hosts = util.load_hash(os.path.join(self.webdir, "hosts.list"))
-
-        self.trees = read_trees_from_conf(os.path.join(self.webdir, "trees.conf"))
+    def __contains__(self, build):
+        try:
+            if build.revision:
+                rev = build.revision
+            else:
+                rev, timestamp = build.revision_details()
+            self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            return False
+        else:
+            return True
 
-    def get_build(self, tree, host, compiler, rev=None):
-        logf = self.build_fname(tree, host, compiler, rev) + ".log"
+    def get_build(self, tree, host, compiler, rev):
+        basename = self.build_fname(tree, host, compiler, rev)
+        logf = "%s.log" % basename
         if not os.path.exists(logf):
             raise NoSuchBuildError(tree, host, compiler, rev)
-        return CachingBuild(self, tree, host, compiler, rev)
-
-    def cache_fname(self, tree, host, compiler, rev=None):
-        if rev is not None:
-            return os.path.join(self.cachedir, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
-        else:
-            return os.path.join(self.cachedir, "build.%s.%s.%s" % (tree, host, compiler))
+        return Build(basename, tree, host, compiler, rev)
 
-    def build_fname(self, tree, host, compiler, rev=None):
+    def build_fname(self, tree, host, compiler, rev):
         """get the name of the build file"""
-        if rev is not None:
-            return os.path.join(self.datadir, "oldrevs/build.%s.%s.%s-%s" % (tree, host, compiler, rev))
-        return os.path.join(self.datadir, "upload/build.%s.%s.%s" % (tree, host, compiler))
-
-    def html_build_status_from_logs(self, log, err):
-        def span_status(st):
-            if st is None:
-                return span("status unknown", "?")
-            elif st == 0:
-                return span("status passed", "ok")
-            else:
-                return span("status failed", st)
-        (cstatus, bstatus, istatus, tstatus, sstatus, other_failures) = self.build_status_from_logs(log, err)
-        ostatus = ""
-        if "panic" in other_failures:
-            ostatus += "/"+span("status panic", "PANIC")
-        if "disk full" in other_failures:
-            ostatus += "/"+span("status failed", "disk full")
-        if "timeout" in other_failures:
-            ostatus += "/"+span("status failed", "timeout")
-        if sstatus is not None:
-            ostatus += "/".span("status checker", sstatus)
-        return "%s/%s/%s/%s%s" % (span_status(cstatus), span_status(bstatus), span_status(istatus), span_status(tstatus), ostatus)
-
-    def build_status_from_logs(self, log, err):
-        """get status of build"""
-        m = re.search("TEST STATUS:(.*)", log)
-        if m:
-            tstatus = m.group(1)
-        else:
-            m = re.search("ACTION (PASSED|FAILED): test", log)
-            if m:
-                test_failures = len(re.findall("testsuite-(failure|error): ", log))
-                test_successes = len(re.findall("testsuite-success: ", log))
-                if test_successes > 0:
-                    tstatus = test_failures
-                else:
-                    tstatus = 255
-            else:
-                tstatus = None
-
-        m = re.search("INSTALL STATUS:(.*)", log)
-        if m:
-            istatus = m.group(1)
-        else:
-            istatus = None
-
-        m = re.search("BUILD STATUS:(.*)", log)
-        if m:
-            bstatus = m.group(1)
-        else:
-            bstatus = None
-
-        m = re.search("CONFIGURE STATUS:(.*)", log)
-        if m:
-            cstatus = m.group(1)
-        else:
-            cstatus = None
-
-        other_failures = set()
-        m = re.search("(PANIC|INTERNAL ERROR):.*", log)
-        if m:
-            other_failures.add("panic")
-
-        if "No space left on device" in err or "No space left on device" in log:
-            other_failures.add("disk full")
-
-        if "maximum runtime exceeded" in log:
-            other_failures.add("timeout")
-
-        m = re.search("CC_CHECKER STATUS: (.*)", log)
-        if m:
-            sstatus = m.group(1)
-        else:
-            sstatus = None
-
-        return (cstatus, bstatus, istatus, tstatus, sstatus, other_failures)
-
-    def build_status_info_from_string(self, rev_seq, rev, status_raw):
-        """find the build status as an object
-
-        the 'value' gets one point for passing each stage"""
-        status_split = status_raw.split("/")
-        status_str = ""
-        status_arr = []
-        status_val = 0
-
-        for r in status_split:
-            r = r.strip()
-
-            if r == "ok":
-                e = 0
-            elif r.isdigit():
-                e = int(r)
-                if e < 0:
-                    e = 1
-            else:
-                e = 1
-
-            if status_str != "":
-                status_str += "/"
-            status_str += "%d" % r
-
-            status_val += e
-
-            status_arr.append(e)
-
-        return {
-            "rev": rev,
-            "rev_seq": rev_seq,
-            "array": status_arr,
-            "string": status_str,
-            "value": status_val,
-            }
-
-    def build_status_info_from_html(self, rev_seq, rev, status_html):
-        """find the build status as an perl object
-
-        the 'value' gets one point for passing each stage
-        """
-        status_raw = util.strip_html(status_html)
-        return self.build_status_info_from_string(rev_seq, rev, status_raw)
-
-    def build_status_info(self, tree, host, compiler, rev_seq):
-        """find the build status as an object
-
-        the 'value' gets one point for passing each stage
-        """
-        build = self.get_build(tree, host, compiler, rev_seq)
-        rev, rev_time = build.revision_details()
-        status_html = build.status()
-        return self.build_status_info_from_html(rev_seq, rev, status_html)
-
-    def lcov_status(self, tree):
-        """get status of build"""
-        cachefile = os.path.join(self.cachedir, "lcov.%s.%s.status" % (
-            self.LCOVHOST, tree))
-        file = os.path.join(self.lcovdir, self.LCOVHOST, tree, "index.html")
-        try:
-            st1 = os.stat(file)
-        except OSError:
-            # File does not exist
-            raise NoSuchBuildError(tree, self.LCOVHOST, "lcov")
-        try:
-            st2 = os.stat(cachefile)
-        except OSError:
-            # file does not exist
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return util.FileLoad(cachefile)
-
-        lcov_html = util.FileLoad(file)
-        m = re.search('\<td class="headerItem".*?\>Code\&nbsp\;covered\:\<\/td\>.*?\n.*?\<td class="headerValue".*?\>([0-9.]+) \%', lcov_html)
-        if m:
-            ret = "<a href=\"/lcov/data/%s/%s\">%s %%</a>" % (self.LCOVHOST, tree, m.group(1))
-        else:
-            ret = ""
-        if self.readonly:
-            util.FileSave(cachefile, ret)
-        return ret
+        return os.path.join(self.path, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
 
     def get_old_revs(self, tree, host, compiler):
         """get a list of old builds and their status."""
         ret = []
-        directory = os.path.join(self.datadir, "oldrevs")
-        logfiles = [d for d in os.listdir(directory) if d.startswith("build.%s.%s.%s-" % (tree, host, compiler)) and d.endswith(".log")]
+        logfiles = [d for d in os.listdir(self.path) if d.startswith("build.%s.%s.%s-" % (tree, host, compiler)) and d.endswith(".log")]
         for l in logfiles:
             m = re.match(".*-([0-9A-Fa-f]+).log$", l)
             if m:
                 rev = m.group(1)
-                stat = os.stat(os.path.join(directory, l))
+                stat = os.stat(os.path.join(self.path, l))
                 # skip the current build
                 if stat.st_nlink == 2:
                     continue
-                build = self.get_build(tree, host, compiler, rev)
-                r = {
-                    "STATUS": build.status(),
-                    "REVISION": rev,
-                    "TIMESTAMP": stat.st_ctime
-                    }
-                ret.append(r)
+                ret.append(self.get_build(tree, host, compiler, rev))
 
-        ret.sort(lambda a, b: cmp(a["TIMESTAMP"], b["TIMESTAMP"]))
+        ret.sort(lambda a, b: cmp(a.age_mtime(), b.age_mtime()))
 
         return ret
 
-    def has_host(self, host):
-        return host in os.listdir(os.path.join(self.datadir, "upload"))
-
-    def host_age(self, host):
-        """get the overall age of a host"""
-        ret = None
-        for compiler in self.compilers:
-            for tree in self.trees:
-                try:
-                    build = self.get_build(tree, host, compiler)
-                except NoSuchBuildError:
-                    pass
-                else:
-                    ret = min(ret, build.age_mtime())
-        return ret
+    def upload_build(self, build):
+        (rev, rev_timestamp) = build.revision_details()
+
+        new_basename = self.build_fname(build.tree, build.host, build.compiler, rev)
+        try:
+            existing_build = self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            pass
+        else:
+            existing_build.remove_logs()
+        os.link(build.basename+".log", new_basename+".log")
+        if os.path.exists(build.basename+".err"):
+            os.link(build.basename+".err", new_basename+".err")
+        return Build(new_basename, build.tree, build.host, build.compiler, rev)
+
+    def get_previous_revision(self, tree, host, compiler, revision):
+        raise NoSuchBuildError(tree, host, compiler, revision)
+
+    def get_latest_revision(self, tree, host, compiler):
+        raise NoSuchBuildError(tree, host, compiler)