Use separate function for tree parsing, allow C extension for tree parsing.

author Jelmer Vernooij <jelmer@samba.org>

Thu, 30 Apr 2009 09:57:10 +0000 (09:59 +0002)

committer Jelmer Vernooij <jelmer@samba.org>

Thu, 30 Apr 2009 09:57:10 +0000 (09:59 +0002)
author Jelmer Vernooij <jelmer@samba.org>
Thu, 30 Apr 2009 09:57:10 +0000 (09:59 +0002)
committer Jelmer Vernooij <jelmer@samba.org>
Thu, 30 Apr 2009 09:57:10 +0000 (09:59 +0002)
diff --git a/bin/dulwich b/bin/dulwich

index 83b399791ea5313fe05f6bf84562a1e223baedf5..88cf06db62396fd4da73f73d8c44c9a79ff0ee95 100755 (executable)
--- a/bin/dulwich
+++ b/bin/dulwich
@@ -166,14 +166,9 @@ def cmd_clone(args):
         r = Repo(path)
         graphwalker = SimpleFetchGraphWalker(r.heads().values(), r.get_parents)
         f, commit = r.object_store.add_pack()
-       try:
-               client.fetch_pack(host_path, r.object_store.determine_wants_all, graphwalker, f.write, 
-                                         sys.stdout.write)
-               f.close()
-               commit()
-       except:
-               f.close()
-               raise
+       client.fetch_pack(host_path, r.object_store.determine_wants_all, 
+                                 graphwalker, f.write, sys.stdout.write)
+       commit()
  
  
  commands = {
diff --git a/dulwich/index.py b/dulwich/index.py

index bdabfcfa0135470e74f8b367c4b4d5e7021c094f..bfc90d6bfcd346a620c12ce5d32f732f3960d729 100644 (file)
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -25,27 +25,40 @@ def read_cache_time(f):
  
  
  def write_cache_time(f, t):
+    if isinstance(t, int):
+        t = (t, 0)
      f.write(struct.pack(">LL", *t))
  
  
  def read_cache_entry(f):
+    """Read an entry from a cache file.
+
+    :param f: File-like object to read from
+    :return: tuple with: inode, device, mode, uid, gid, size, sha, flags
+    """
      beginoffset = f.tell()
      ctime = read_cache_time(f)
      mtime = read_cache_time(f)
      (ino, dev, mode, uid, gid, size, sha, flags, ) = \
          struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
      name = ""
-    char = ord(f.read(1))
-    while char != 0:
-        name += chr(char)
-        char = ord(f.read(1))
+    char = f.read(1)
+    while char != "\0":
+        name += char
+        char = f.read(1)
      # Padding:
-    real_size = ((f.tell() - beginoffset + 8) & ~7)
+    real_size = ((f.tell() - beginoffset + 7) & ~7)
      f.seek(beginoffset + real_size)
      return (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha, flags)
  
  
  def write_cache_entry(f, entry):
+    """Write an index entry to a file.
+
+    :param f: File object
+    :param entry: Entry to write, tuple with: 
+        (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha, flags)
+    """
      beginoffset = f.tell()
      (name, ctime, mtime, ino, dev, mode, uid, gid, size, sha, flags) = entry
      write_cache_time(f, ctime)
@@ -53,46 +66,100 @@ def write_cache_entry(f, entry):
      f.write(struct.pack(">LLLLLL20sH", ino, dev, mode, uid, gid, size, sha, flags))
      f.write(name)
      f.write(chr(0))
-    real_size = ((f.tell() - beginoffset + 8) & ~7)
-    f.write(chr(0) * (f.tell() - (beginoffset + real_size)))
-    return 
+    real_size = ((f.tell() - beginoffset + 7) & ~7)
+    f.write("\0" * ((beginoffset + real_size) - f.tell()))
+
  
  def read_index(f):
-    assert f.read(4) == "DIRC"
+    """Read an index file, yielding the individual entries."""
+    header = f.read(4)
+    if header != "DIRC":
+        raise AssertionError("Invalid index file header: %r" % header)
      (version, num_entries) = struct.unpack(">LL", f.read(4 * 2))
      assert version in (1, 2)
      for i in range(num_entries):
          yield read_cache_entry(f)
  
  
+def read_index_dict(f):
+    """Read an index file and return it as a dictionary.
+    
+    :param f: File object to read from
+    """
+    ret = {}
+    for x in read_index(f):
+        ret[x[0]] = tuple(x[1:])
+    return ret
+
+
  def write_index(f, entries):
+    """Write an index file.
+    
+    :param f: File-like object to write to
+    :param entries: Iterable over the entries to write
+    """
      f.write("DIRC")
      f.write(struct.pack(">LL", 2, len(entries)))
      for x in entries:
          write_cache_entry(f, x)
  
  
+def write_index_dict(f, entries):
+    """Write an index file based on the contents of a dictionary.
+
+    """
+    entries_list = []
+    for name in sorted(entries):
+        entries_list.append((name,) + tuple(entries[name]))
+    write_index(f, entries_list)
+
+
  class Index(object):
  
      def __init__(self, filename):
-        self._entries = []
-        f = open(filename, 'r')
-        self._byname = {}
+        self._filename = filename
+        self.clear()
+        self.read()
+
+    def write(self):
+        f = open(self._filename, 'w')
+        try:
+            write_index_dict(f, self._byname)
+        finally:
+            f.close()
+
+    def read(self):
+        f = open(self._filename, 'r')
          try:
              for x in read_index(f):
-                self._entries.append(x)
-                self._byname[x[0]] = x
+                self[x[0]] = tuple(x[1:])
          finally:
              f.close()
  
      def __len__(self):
-        return len(self._entries)
+        return len(self._byname)
  
-    def items(self):
-        return list(self._entries)
+    def __getitem__(self, name):
+        return self._byname[name]
  
      def __iter__(self):
-        return iter(self._entries)
+        return iter(self._byname)
  
-    def __getitem__(self, name):
-        return self._byname[name]
+    def get_sha1(self, path):
+        return self[path][-2]
+
+    def clear(self):
+        self._byname = {}
+
+    def __setitem__(self, name, x):
+        assert isinstance(name, str)
+        assert len(x) == 10
+        # Remove the old entry if any
+        self._byname[name] = x
+
+    def iteritems(self):
+        return self._byname.iteritems()
+
+    def update(self, entries):
+        for name, value in entries.iteritems():
+            self[name] = value
diff --git a/dulwich/object_store.py b/dulwich/object_store.py

index b761488809d25e4a7d7e215cffc485ecca2a0c51..4441c6bfca717f0d4dd49bd8bd8feb59ab97840c 100644 (file)
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -100,11 +100,13 @@ class ObjectStore(object):
          return None
  
      def _add_shafile(self, sha, o):
-        path = self._get_shafile_path(sha)
-        f = os.path.open(path, 'w')
+        dir = os.path.join(self.path, sha[:2])
+        if not os.path.isdir(dir):
+            os.mkdir(dir)
+        path = os.path.join(dir, sha[2:])
+        f = open(path, 'w+')
          try:
-            f.write(o._header())
-            f.write(o._text)
+            f.write(o.as_legacy_object())
          finally:
              f.close()
  
@@ -211,6 +213,9 @@ class ObjectStore(object):
                  self.move_in_pack(path)
          return f, commit
  
+    def add_object(self, obj):
+        self._add_shafile(obj.id, obj)
+
      def add_objects(self, objects):
          """Add a set of objects to this object store.
  
diff --git a/dulwich/objects.py b/dulwich/objects.py

index 3ae73b9fb80941802b0ac2d12c1de78bb3714072..3657029035db975c20b41b6699eee0c13a7b6da6 100644 (file)
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -1,4 +1,4 @@
-# objects.py -- Acces to base git objects
+# objects.py -- Access to base git objects
  # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  # 
@@ -17,6 +17,10 @@
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  # MA  02110-1301, USA.
  
+
+"""Access to base git objects."""
+
+
  import mmap
  import os
  import sha
@@ -45,17 +49,20 @@ def _decompress(string):
      dcomped += dcomp.flush()
      return dcomped
  
+
  def sha_to_hex(sha):
      """Takes a string and returns the hex of the sha within"""
      hexsha = "".join(["%02x" % ord(c) for c in sha])
      assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
      return hexsha
  
+
  def hex_to_sha(hex):
      """Takes a hex sha and returns a binary sha"""
      assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
      return ''.join([chr(int(hex[i:i+2], 16)) for i in xrange(0, len(hex), 2)])
  
+
  class ShaFile(object):
      """A git SHA file."""
    
@@ -85,6 +92,9 @@ class ShaFile(object):
          text = text[1:]
          object._text = text
          return object
+
+    def as_legacy_object(self):
+        return zlib.compress("%s %d\0%s" % (self._type, len(self._text), self._text))
    
      def as_raw_string(self):
          return self._num_type, self._text
@@ -99,8 +109,8 @@ class ShaFile(object):
          try:
              object = num_type_map[num_type]()
          except KeyError:
-            assert False, "Not a known type: %d" % num_type
-        while((byte & 0x80) != 0):
+            raise AssertionError("Not a known type: %d" % num_type)
+        while (byte & 0x80) != 0:
              byte = ord(map[used])
              used += 1
          raw = map[used:]
@@ -352,7 +362,7 @@ class Tree(ShaFile):
      _num_type = 2
  
      def __init__(self):
-        self._entries = []
+        self._entries = {}
  
      @classmethod
      def from_file(cls, filename):
@@ -361,18 +371,27 @@ class Tree(ShaFile):
              raise NotTreeError(filename)
          return tree
  
+    def __getitem__(self, name):
+        return self._entries[name]
+
+    def __setitem__(self, name, value):
+        assert isinstance(value, tuple)
+        assert len(value) == 2
+        self._entries[name] = value
+
+    def __delitem__(self, name):
+        del self._entries[name]
+
      def add(self, mode, name, hexsha):
-        self._entries.append((mode, name, hexsha))
+        self._entries[name] = mode, hexsha
  
      def entries(self):
          """Return a list of tuples describing the tree entries"""
-        return self._entries
+        return [(mode, name, hexsha) for (name, (mode, hexsha)) in self._entries.iteritems()]
  
-    def __getitem__(self, name):
-        for mode, entry, hexsha in self.entries():
-            if entry == name:
-                return mode, hexsha
-        raise KeyError(name)
+    def iteritems(self):
+        for name in sorted(self._entries.keys()):
+            yield name, self._entries[name][0], self._entries[name][1]
  
      def _parse_text(self):
          """Grab the entries in the tree"""
@@ -380,7 +399,7 @@ class Tree(ShaFile):
  
      def serialize(self):
          self._text = ""
-        for mode, name, hexsha in self._entries:
+        for name, mode, hexsha in self.iteritems():
              self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
  
  
diff --git a/dulwich/pack.py b/dulwich/pack.py

index 89db731e6108912e6e6b58675a053c71e1215839..5d665026e82989e5376995b37b420e94e25286bb 100644 (file)
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -107,13 +107,8 @@ def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
      :param access: Access mechanism.
      :return: MMAP'd area.
      """
-    if supports_mmap_offset:
-        return (mmap.mmap(f.fileno(), size, access=access, 
-                    offset=int(offset / mmap.ALLOCATIONGRANULARITY)), 
-                (offset % mmap.ALLOCATIONGRANULARITY))
-    else:
-        mem = mmap.mmap(f.fileno(), size+offset, access=access)
-        return mem, offset
+    mem = mmap.mmap(f.fileno(), size+offset, access=access)
+    return mem, offset
  
  
  def load_pack_index(filename):
diff --git a/dulwich/tests/test_index.py b/dulwich/tests/test_index.py

index 855ab844e5cdf12848bbf28b3e4bd6103a98fd94..da06438ffbec813de6f3ad939412612c9126db99 100644 (file)
--- a/dulwich/tests/test_index.py
+++ b/dulwich/tests/test_index.py
@@ -21,6 +21,7 @@ from unittest import TestCase
  
  from dulwich.index import (
      Index,
+    read_index,
      write_index,
      )
  
@@ -38,13 +39,10 @@ class SimpleIndexTestcase(IndexTestCase):
          self.assertEquals(1, len(self.get_simple_index("index")))
  
      def test_iter(self):
-        self.assertEquals([
-            ('bla', (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)
-            ], 
-                list(self.get_simple_index("index")))
+        self.assertEquals(['bla'], list(self.get_simple_index("index")))
  
      def test_getitem(self):
-        self.assertEquals( ('bla', (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)
+        self.assertEquals( ((1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)
              , 
                  self.get_simple_index("index")["bla"])
  
@@ -52,10 +50,15 @@ class SimpleIndexTestcase(IndexTestCase):
  class SimpleIndexWriterTestCase(IndexTestCase):
  
      def test_simple_write(self):
+        entries = [('barbla', (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)]
          x = open('test-simple-write-index', 'w+')
          try:
-            write_index(x, [('barbla', (1230680220, 0), (1230680220, 0), 2050, 3761020, 33188, 1000, 1000, 0, '\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2\xe4\x8cS\x91', 3)])
+            write_index(x, entries)
+        finally:
+            x.close()
+        x = open('test-simple-write-index', 'r')
+        try:
+            self.assertEquals(entries, list(read_index(x)))
          finally:
              x.close()
-
author	Jelmer Vernooij <jelmer@samba.org>
	Thu, 30 Apr 2009 09:57:10 +0000 (09:59 +0002)
committer	Jelmer Vernooij <jelmer@samba.org>
	Thu, 30 Apr 2009 09:57:10 +0000 (09:59 +0002)
bin/dulwich		patch \| blob \| history
dulwich/index.py		patch \| blob \| history
dulwich/object_store.py		patch \| blob \| history
dulwich/objects.py		patch \| blob \| history
dulwich/pack.py		patch \| blob \| history
dulwich/tests/test_index.py		patch \| blob \| history