Make the fix script update revision details.
[amitay/build-farm.git] / buildfarm / data.py
index 8c30b7edadced0fc61651d8d2c10b26c73fe6d7d..90345f87fb8e781b99f74c1fcaac1d756cc14d12 100644 (file)
 #   along with this program; if not, write to the Free Software
 #   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 
-
 from cStringIO import StringIO
+import collections
 import hashlib
 import os
 import re
 import time
-import util
 
 
 class BuildSummary(object):
 
-    def __init__(self, host, tree, compiler, rev, status):
+    def __init__(self, host, tree, compiler, revision, status):
         self.host = host
         self.tree = tree
         self.compiler = compiler
-        self.rev = rev
+        self.revision = revision
         self.status = status
 
 
+BuildStageResult = collections.namedtuple("BuildStageResult", "name result")
+
+
+class MissingRevisionInfo(Exception):
+    """Revision info could not be found in the build log."""
+
+    def __init__(self, build=None):
+        self.build = build
+
+
+class LogFileMissing(Exception):
+    """Log file missing."""
+
+
 class BuildStatus(object):
 
     def __init__(self, stages=None, other_failures=None):
         if stages is not None:
-            self.stages = stages
+            self.stages = [BuildStageResult(n, r) for (n, r) in stages]
         else:
             self.stages = []
         if other_failures is not None:
@@ -52,19 +65,48 @@ class BuildStatus(object):
         else:
             self.other_failures = set()
 
+    @property
+    def failed(self):
+        if self.other_failures:
+            return True
+        return not all([x.result == 0 for x in self.stages])
+
+    def __serialize__(self):
+        return repr(self)
+
+    @classmethod
+    def __deserialize__(cls, text):
+        return eval(text)
+
+    def __str__(self):
+        if self.other_failures:
+            return ",".join(self.other_failures)
+        return "/".join([str(x.result) for x in self.stages])
+
     def broken_host(self):
         if "disk full" in self.other_failures:
             return True
         return False
 
-    def _status_tuple(self):
-        return [v for (k, v) in self.stages]
-
-    def regressed_since(self, other):
+    def regressed_since(self, older):
         """Check if this build has regressed since another build."""
         if "disk full" in self.other_failures:
             return False
-        return cmp(self._status_tuple(), other._status_tuple())
+        if "timeout" in self.other_failures and "timeout" in older.other_failures:
+            # When the timeout happens exactly can differ slightly, so it's okay
+            # if the numbers are a bit different..
+            return False
+        if "panic" in self.other_failures and not "panic" in older.other_failures:
+            return True
+        if len(self.stages) < len(older.stages):
+            # Less stages completed
+            return True
+        for ((old_name, old_result), (new_name, new_result)) in zip(
+            older.stages, self.stages):
+            assert old_name == new_name
+            if new_result > old_result:
+                return True
+        return False
 
     def __cmp__(self, other):
         other_extra = other.other_failures - self.other_failures
@@ -84,8 +126,8 @@ class BuildStatus(object):
         else:
             return cmp(other.stages, self.stages)
 
-    def __str__(self):
-        return repr((self.stages, self.other_failures))
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__, self.stages, self.other_failures)
 
 
 def check_dir_exists(kind, path):
@@ -95,36 +137,25 @@ def check_dir_exists(kind, path):
 
 def build_status_from_logs(log, err):
     """get status of build"""
+    # FIXME: Perhaps also extract revision here?
+
     test_failures = 0
     test_successes = 0
     test_seen = 0
     ret = BuildStatus()
 
     stages = []
+    re_status = re.compile("^([A-Z_]+) STATUS:(\s*\d+)$")
+    re_action = re.compile("^ACTION (PASSED|FAILED):\s+test$")
 
     for l in log:
-        m = re.match("^([A-Z_]+) STATUS:(\s*\d+)$", l)
-        if m:
-            stages.append((m.group(1), int(m.group(2).strip())))
-            if m.group(1) == "TEST":
-                test_seen = 1
-            continue
-        m = re.match("^ACTION (PASSED|FAILED):\s+test$", l)
-        if m and not test_seen:
-            if m.group(1) == "PASSED":
-                stages.append(("TEST", 0))
-            else:
-                stages.append(("TEST", 1))
-            continue
-
         if l.startswith("No space left on device"):
             ret.other_failures.add("disk full")
             continue
-        if l.startswith("maximum runtime exceeded"):
+        if "maximum runtime exceeded" in l: # Ugh.
             ret.other_failures.add("timeout")
             continue
-        m = re.match("^(PANIC|INTERNAL ERROR):.*$", l)
-        if m:
+        if l.startswith("PANIC:") or l.startswith("INTERNAL ERROR:"):
             ret.other_failures.add("panic")
             continue
         if l.startswith("testsuite-failure: ") or l.startswith("testsuite-error: "):
@@ -133,28 +164,54 @@ def build_status_from_logs(log, err):
         if l.startswith("testsuite-success: "):
             test_successes += 1
             continue
+        m = re_status.match(l)
+        if m:
+            stages.append(BuildStageResult(m.group(1), int(m.group(2).strip())))
+            if m.group(1) == "TEST":
+                test_seen = 1
+            continue
+        m = re_action.match(l)
+        if m and not test_seen:
+            if m.group(1) == "PASSED":
+                stages.append(BuildStageResult("TEST", 0))
+            else:
+                stages.append(BuildStageResult("TEST", 1))
+            continue
 
     # Scan err file for specific errors
     for l in err:
         if "No space left on device" in l:
             ret.other_failures.add("disk full")
 
-    def map_stage(name, result):
-        if name != "TEST":
-            return (name, result)
+    def map_stage(sr):
+        if sr.name != "TEST":
+            return sr
         # TEST is special
         if test_successes + test_failures == 0:
             # No granular test output
-            return ("TEST", result)
-        if result == 1 and test_failures == 0:
+            return BuildStageResult("TEST", sr.result)
+        if sr.result == 1 and test_failures == 0:
             ret.other_failures.add("inconsistent test result")
-            return ("TEST", -1)
-        return ("TEST", test_failures)
+            return BuildStageResult("TEST", -1)
+        return BuildStageResult("TEST", test_failures)
 
-    ret.stages = [map_stage(name, result) for (name, result) in stages]
+    ret.stages = map(map_stage, stages)
     return ret
 
 
+def revision_from_log(log):
+    revid = None
+    timestamp = None
+    for l in log:
+        if l.startswith("BUILD COMMIT REVISION: "):
+            revid = l.split(":", 1)[1].strip()
+        elif l.startswith("BUILD COMMIT TIME"):
+            timestamp = l.split(":", 1)[1].strip()
+    if revid is None:
+        raise MissingRevisionInfo()
+    return (revid, timestamp)
+
+
 class NoSuchBuildError(Exception):
     """The build with the specified name does not exist."""
 
@@ -169,43 +226,55 @@ class Build(object):
     """A single build of a tree on a particular host using a particular compiler.
     """
 
-    def __init__(self, store, basename, tree, host, compiler, rev=None):
-        self._store = store
+    def __init__(self, basename, tree, host, compiler, rev=None):
         self.basename = basename
         self.tree = tree
         self.host = host
         self.compiler = compiler
-        self.rev = rev
+        self.revision = rev
+
+    def __cmp__(self, other):
+        return cmp(
+            (self.upload_time, self.revision, self.host, self.tree, self.compiler),
+            (other.upload_time, other.revision, other.host, other.tree, other.compiler))
+
+    def __eq__(self, other):
+        return (isinstance(other, Build) and
+                self.log_checksum() == other.log_checksum())
 
     def __repr__(self):
-        if self.rev:
-            return "<%s: revision %s of %s on %s using %s>" % (self.__class__.__name__, self.rev, self.tree, self.host, self.compiler)
+        if self.revision is not None:
+            return "<%s: revision %s of %s on %s using %s>" % (self.__class__.__name__, self.revision, self.tree, self.host, self.compiler)
         else:
             return "<%s: %s on %s using %s>" % (self.__class__.__name__, self.tree, self.host, self.compiler)
 
-    def remove(self):
-        os.unlink(self.basename + ".log")
+    def remove_logs(self):
+        # In general, basename.log should *always* exist.
+        if os.path.exists(self.basename+".log"):
+            os.unlink(self.basename + ".log")
         if os.path.exists(self.basename+".err"):
             os.unlink(self.basename+".err")
 
-    ###################
-    # the mtime age is used to determine if builds are still happening
-    # on a host.
-    # the ctime age is used to determine when the last real build happened
+    def remove(self):
+        self.remove_logs()
 
-    def age_mtime(self):
-        """get the age of build from mtime"""
+    @property
+    def upload_time(self):
+        """get timestamp of build"""
         st = os.stat("%s.log" % self.basename)
-        return time.time() - st.st_mtime
+        return st.st_mtime
 
-    def age_ctime(self):
-        """get the age of build from ctime"""
-        st = os.stat("%s.log" % self.basename)
-        return time.time() - st.st_ctime
+    @property
+    def age(self):
+        """get the age of build"""
+        return time.time() - self.upload_time
 
     def read_log(self):
         """read full log file"""
-        return open(self.basename+".log", "r")
+        try:
+            return open(self.basename+".log", "r")
+        except IOError:
+            raise LogFileMissing()
 
     def read_err(self):
         """read full err file"""
@@ -223,9 +292,7 @@ class Build(object):
             f.close()
 
     def summary(self):
-        (revid, commit_revid, timestamp) = self.revision_details()
-        if commit_revid:
-            revid = commit_revid
+        (revid, timestamp) = self.revision_details()
         status = self.status()
         return BuildSummary(self.host, self.tree, self.compiler, revid, status)
 
@@ -234,24 +301,12 @@ class Build(object):
 
         :return: Tuple with revision id and timestamp (if available)
         """
-
-        revid = None
-        commit_revid = None
-        timestamp = None
         f = self.read_log()
         try:
-            for l in f:
-                if l.startswith("BUILD COMMIT REVISION: "):
-                    commit_revid = l.split(":", 1)[1].strip()
-                elif l.startswith("BUILD REVISION: "):
-                    revid = l.split(":", 1)[1].strip()
-                elif l.startswith("BUILD COMMIT TIME"):
-                    timestamp = l.split(":", 1)[1].strip()
+            return revision_from_log(f)
         finally:
             f.close()
 
-        return (revid, commit_revid, timestamp)
-
     def status(self):
         """get status of build
 
@@ -273,84 +328,6 @@ class Build(object):
         return len(file.readlines())
 
 
-class CachingBuild(Build):
-    """Build subclass that caches some of the results that are expensive
-    to calculate."""
-
-    def revision_details(self):
-        if self.rev:
-            cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        else:
-            cachef = self._store.cache_fname(self.tree, self.host, self.compiler)
-        st1 = os.stat("%s.log" % self.basename)
-
-        try:
-            st2 = os.stat("%s.revision" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        # the ctime/mtime asymmetry is needed so we don't get fooled by
-        # the mtime update from rsync
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            (revid, commit_revid, timestamp) = util.FileLoad("%s.revision" % cachef).split(":", 2)
-            if timestamp == "":
-                timestamp = None
-            if revid == "":
-                revid = None
-            if commit_revid == "":
-                commit_revid = None
-            return (revid, commit_revid, timestamp)
-        (revid, commit_revid, timestamp) = super(CachingBuild, self).revision_details()
-        if not self._store.readonly:
-            util.FileSave("%s.revision" % cachef, "%s:%s:%s" % (revid, commit_revid or "", timestamp or ""))
-        return (revid, commit_revid, timestamp)
-
-    def err_count(self):
-        cachef = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)
-        st1 = os.stat("%s.err" % self.basename)
-
-        try:
-            st2 = os.stat("%s.errcount" % cachef)
-        except OSError:
-            # File does not exist
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return util.FileLoad("%s.errcount" % cachef)
-
-        ret = super(CachingBuild, self).err_count()
-
-        if not self._store.readonly:
-            util.FileSave("%s.errcount" % cachef, str(ret))
-
-        return ret
-
-    def status(self):
-        if self.rev:
-            cachefile = self._store.cache_fname(self.tree, self.host, self.compiler, self.rev)+".status"
-        else:
-            cachefile = self._store.cache_fname(self.tree, self.host, self.compiler)+".status"
-
-        st1 = os.stat("%s.log" % self.basename)
-
-        try:
-            st2 = os.stat(cachefile)
-        except OSError:
-            # No such file
-            st2 = None
-
-        if st2 and st1.st_ctime <= st2.st_mtime:
-            return BuildStatus(*eval(util.FileLoad(cachefile)))
-
-        ret = super(CachingBuild, self).status()
-
-        if not self._store.readonly:
-            util.FileSave(cachefile, str(ret))
-
-        return ret
-
-
 class UploadBuildResultStore(object):
 
     def __init__(self, path):
@@ -360,6 +337,16 @@ class UploadBuildResultStore(object):
         """
         self.path = path
 
+    def get_new_builds(self):
+        for name in os.listdir(self.path):
+            try:
+                (build, tree, host, compiler, extension) = name.split(".")
+            except ValueError:
+                continue
+            if build != "build" or extension != "log":
+                continue
+            yield self.get_build(tree, host, compiler)
+
     def build_fname(self, tree, host, compiler):
         return os.path.join(self.path, "build.%s.%s.%s" % (tree, host, compiler))
 
@@ -377,29 +364,7 @@ class UploadBuildResultStore(object):
         logf = "%s.log" % basename
         if not os.path.exists(logf):
             raise NoSuchBuildError(tree, host, compiler)
-        return Build(self, basename, tree, host, compiler)
-
-
-class CachingUploadBuildResultStore(UploadBuildResultStore):
-
-    def __init__(self, basedir, cachedir, readonly=False):
-        """Open the database.
-
-        :param readonly: Whether to avoid saving cache files
-        """
-        super(CachingUploadBuildResultStore, self).__init__(basedir)
-        self.cachedir = cachedir
-        self.readonly = readonly
-
-    def cache_fname(self, tree, host, compiler):
-        return os.path.join(self.cachedir, "build.%s.%s.%s" % (tree, host, compiler))
-
-    def get_build(self, tree, host, compiler):
-        basename = self.build_fname(tree, host, compiler)
-        logf = "%s.log" % basename
-        if not os.path.exists(logf):
-            raise NoSuchBuildError(tree, host, compiler)
-        return CachingBuild(self, basename, tree, host, compiler)
+        return Build(basename, tree, host, compiler)
 
 
 class BuildResultStore(object):
@@ -412,91 +377,73 @@ class BuildResultStore(object):
         """
         self.path = path
 
-    def get_build(self, tree, host, compiler, rev):
+    def __contains__(self, build):
+        try:
+            if build.revision:
+                rev = build.revision
+            else:
+                rev, timestamp = build.revision_details()
+            self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            return False
+        else:
+            return True
+
+    def get_build(self, tree, host, compiler, rev, checksum=None):
         basename = self.build_fname(tree, host, compiler, rev)
         logf = "%s.log" % basename
         if not os.path.exists(logf):
             raise NoSuchBuildError(tree, host, compiler, rev)
-        return Build(self, basename, tree, host, compiler, rev)
+        return Build(basename, tree, host, compiler, rev)
 
     def build_fname(self, tree, host, compiler, rev):
         """get the name of the build file"""
         return os.path.join(self.path, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
 
-    def get_old_revs(self, tree, host, compiler):
+    def get_all_builds(self):
+        for l in os.listdir(self.path):
+            m = re.match("^build\.([0-9A-Za-z]+)\.([0-9A-Za-z]+)\.([0-9A-Za-z]+)-([0-9A-Fa-f]+).log$", l)
+            if not m:
+                continue
+            tree = m.group(1)
+            host = m.group(2)
+            compiler = m.group(3)
+            rev = m.group(4)
+            stat = os.stat(os.path.join(self.path, l))
+            # skip the current build
+            if stat.st_nlink == 2:
+                continue
+            yield self.get_build(tree, host, compiler, rev)
+
+    def get_old_builds(self, tree, host, compiler):
         """get a list of old builds and their status."""
         ret = []
-        logfiles = [d for d in os.listdir(self.path) if d.startswith("build.%s.%s.%s-" % (tree, host, compiler)) and d.endswith(".log")]
-        for l in logfiles:
-            m = re.match(".*-([0-9A-Fa-f]+).log$", l)
-            if m:
-                rev = m.group(1)
-                stat = os.stat(os.path.join(self.path, l))
-                # skip the current build
-                if stat.st_nlink == 2:
-                    continue
-                build = self.get_build(tree, host, compiler, rev)
-                r = {
-                    "STATUS": build.status(),
-                    "REVISION": rev,
-                    "TIMESTAMP": build.age_ctime(),
-                    }
-                ret.append(r)
-
-        ret.sort(lambda a, b: cmp(a["TIMESTAMP"], b["TIMESTAMP"]))
-
+        for build in self.get_all_builds():
+            if build.tree == tree and build.host == host and build.compiler == compiler:
+                ret.append(build)
+        ret.sort(lambda a, b: cmp(a.upload_time, b.upload_time))
         return ret
 
     def upload_build(self, build):
-        (rev, commit_rev, rev_timestamp) = build.revision_details()
-
-        if commit_rev is not None:
-            rev = commit_rev
-
-        if not rev:
-            raise Exception("Unable to find revision in %r log" % build)
+        (rev, rev_timestamp) = build.revision_details()
 
         new_basename = self.build_fname(build.tree, build.host, build.compiler, rev)
+        try:
+            existing_build = self.get_build(build.tree, build.host, build.compiler, rev)
+        except NoSuchBuildError:
+            if os.path.exists(new_basename+".log"):
+                os.remove(new_basename+".log")
+            if os.path.exists(new_basename+".err"):
+                os.remove(new_basename+".err")
+        else:
+            existing_build.remove_logs()
         os.link(build.basename+".log", new_basename+".log")
         if os.path.exists(build.basename+".err"):
             os.link(build.basename+".err", new_basename+".err")
-
-        # FIXME:
-        # $st = $dbh->prepare("INSERT INTO build (tree, revision, commit_revision, host, compiler, checksum, age, status) VALUES (?, ?, ?, ?, ?, ?, ?, ?)")
-        # $st->execute($tree, $rev, $commit, $host, $compiler, $checksum, $stat->ctime, $status_html)
+        return Build(new_basename, build.tree, build.host, build.compiler, rev)
 
     def get_previous_revision(self, tree, host, compiler, revision):
         raise NoSuchBuildError(tree, host, compiler, revision)
 
-"""
-    def get_previous_revision(self, tree, host, compiler, revision):
-        # Look up the database to find the previous status
-        $st = $dbh->prepare("SELECT status, revision, commit_revision FROM build WHERE tree = ? AND host = ? AND compiler = ? AND revision != ? AND commit_revision != ? ORDER BY id DESC LIMIT 1")
-        $st->execute( $tree, $host, $compiler, $rev, $commit)
-
-        while ( my @row = $st->fetchrow_array ) {
-            $old_status_html = @row[0]
-            $old_rev = @row[1]
-            $old_commit = @row[2]
-        """
-
-
-class CachingBuildResultStore(BuildResultStore):
-
-    def __init__(self, basedir, cachedir, readonly=False):
-        super(CachingBuildResultStore, self).__init__(basedir)
-
-        self.cachedir = cachedir
-        check_dir_exists("cache", self.cachedir)
-
-        self.readonly = readonly
-
-    def get_build(self, tree, host, compiler, rev):
-        basename = self.build_fname(tree, host, compiler, rev)
-        logf = "%s.log" % basename
-        if not os.path.exists(logf):
-            raise NoSuchBuildError(tree, host, compiler, rev)
-        return CachingBuild(self, basename, tree, host, compiler, rev)
-
-    def cache_fname(self, tree, host, compiler, rev):
-        return os.path.join(self.cachedir, "build.%s.%s.%s-%s" % (tree, host, compiler, rev))
+    def get_latest_revision(self, tree, host, compiler):
+        raise NoSuchBuildError(tree, host, compiler)