Provide C implementation of tree item sorter.
authorJelmer Vernooij <jelmer@samba.org>
Wed, 31 Mar 2010 15:44:14 +0000 (17:44 +0200)
committerJelmer Vernooij <jelmer@samba.org>
Wed, 31 Mar 2010 15:44:14 +0000 (17:44 +0200)
dulwich/_objects.c
dulwich/objects.py

index cf6aaaee40375a5d7d2349433fbb02fd6940f368..0273d743ac7fcff59ea32b55e3d28e2db28e8bc8 100644 (file)
@@ -18,6 +18,8 @@
  */
 
 #include <Python.h>
+#include <stdlib.h>
+#include <sys/stat.h>
 
 #define bytehex(x) (((x)<0xa)?('0'+(x)):('a'-0xa+(x)))
 
@@ -49,8 +51,8 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 
        end = text + len;
 
-    while (text < end) {
-        long mode;
+       while (text < end) {
+               long mode;
                mode = strtol(text, &text, 8);
 
                if (*text != ' ') {
@@ -61,7 +63,7 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
 
                text++;
 
-        namelen = strlen(text);
+               namelen = strlen(text);
 
                name = PyString_FromStringAndSize(text, namelen);
                if (name == NULL) {
@@ -69,12 +71,13 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
                        return NULL;
                }
 
-        item = Py_BuildValue("(lN)", mode, sha_to_pyhex((unsigned char *)text+namelen+1));
-        if (item == NULL) {
-            Py_DECREF(ret);
+               item = Py_BuildValue("(lN)", mode,
+                                                        sha_to_pyhex((unsigned char *)text+namelen+1));
+               if (item == NULL) {
+                       Py_DECREF(ret);
                        Py_DECREF(name);
-            return NULL;
-        }
+                       return NULL;
+               }
                if (PyDict_SetItem(ret, name, item) == -1) {
                        Py_DECREF(ret);
                        Py_DECREF(item);
@@ -84,13 +87,110 @@ static PyObject *py_parse_tree(PyObject *self, PyObject *args)
                Py_DECREF(item);
 
                text += namelen+21;
-    }
+       }
+
+       return ret;
+}
+
+struct tree_item {
+       const char *name;
+       int mode;
+       PyObject *tuple;
+};
+
+int cmp_tree_item(const void *_a, const void *_b)
+{
+       const struct tree_item *a = _a, *b = _b;
+       const char *remain_a, *remain_b;
+       int ret, common;
+       if (strlen(a->name) > strlen(b->name)) {
+               common = strlen(b->name);
+               remain_a = a->name + common;
+               remain_b = (S_ISDIR(b->mode)?"/":"");
+       } else if (strlen(b->name) > strlen(a->name)) { 
+               common = strlen(a->name);
+               remain_a = (S_ISDIR(a->mode)?"/":"");
+               remain_b = b->name + common;
+       } else { /* strlen(a->name) == strlen(b->name) */
+               common = 0;
+               remain_a = a->name;
+               remain_b = b->name;
+       }
+       ret = strncmp(a->name, b->name, common);
+       if (ret != 0)
+               return ret;
+       return strcmp(remain_a, remain_b);
+}
+
+static PyObject *py_sorted_tree_items(PyObject *self, PyObject *entries)
+{
+       struct tree_item *qsort_entries;
+       int num, i;
+       PyObject *ret;
+       Py_ssize_t pos = 0; 
+       PyObject *key, *value;
+
+       if (!PyDict_Check(entries)) {
+               PyErr_SetString(PyExc_TypeError, "Argument not a dictionary");
+               return NULL;
+       }
+
+       num = PyDict_Size(entries);
+       qsort_entries = malloc(num * sizeof(struct tree_item));
+       if (qsort_entries == NULL) {
+               PyErr_NoMemory();
+               return NULL;
+       }
+
+       i = 0;
+       while (PyDict_Next(entries, &pos, &key, &value)) {
+               PyObject *py_mode, *py_sha;
+               
+               if (PyTuple_Size(value) != 2) {
+                       PyErr_SetString(PyExc_ValueError, "Tuple has invalid size");
+                       free(qsort_entries);
+                       return NULL;
+               }
+
+               py_mode = PyTuple_GET_ITEM(value, 0);
+               py_sha = PyTuple_GET_ITEM(value, 1);
+               qsort_entries[i].tuple = Py_BuildValue("(OOO)", key, py_mode, py_sha);
+               if (!PyString_CheckExact(key)) {
+                       PyErr_SetString(PyExc_TypeError, "Name is not a string");
+                       free(qsort_entries);
+                       return NULL;
+               }
+               qsort_entries[i].name = PyString_AS_STRING(key);
+               if (!PyInt_CheckExact(py_mode)) {
+                       PyErr_SetString(PyExc_TypeError, "Mode is not an int");
+                       free(qsort_entries);
+                       return NULL;
+               }
+               qsort_entries[i].mode = PyInt_AS_LONG(py_mode);
+               i++;
+       }
+
+       qsort(qsort_entries, num, sizeof(struct tree_item), cmp_tree_item);
+
+       ret = PyList_New(num);
+       if (ret == NULL) {
+               free(qsort_entries);
+               PyErr_NoMemory();
+               return NULL;
+       }
+
+       for (i = 0; i < num; i++) {
+               PyList_SET_ITEM(ret, i, qsort_entries[i].tuple);
+       }
+
+       free(qsort_entries);
 
-    return ret;
+       return ret;
 }
 
 static PyMethodDef py_objects_methods[] = {
        { "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS, NULL },
+       { "sorted_tree_items", (PyCFunction)py_sorted_tree_items, METH_O, NULL },
        { NULL, NULL, 0, NULL }
 };
 
index 9a07e22c61117bc0a462d15db3a7086d3ea88dd5..e4a53c14ab920f0ce556131ad4b3d820c2a26f01 100644 (file)
@@ -53,7 +53,8 @@ TYPE_ID = "type"
 TAGGER_ID = "tagger"
 ENCODING_ID = "encoding"
 
-S_IFGITLINK    = 0160000
+S_IFGITLINK = 0160000
+
 def S_ISGITLINK(m):
     return (stat.S_IFMT(m) == S_IFGITLINK)
 
@@ -421,25 +422,54 @@ class Tag(ShaFile):
 
 
 def parse_tree(text):
+    """Parse a tree text.
+
+    :param text: Serialized text to parse
+    :return: Dictionary with names as keys, (mode, sha) tuples as values
+    """
     ret = {}
     count = 0
     l = len(text)
     while count < l:
         mode_end = text.index(' ', count)
         mode = int(text[count:mode_end], 8)
-
         name_end = text.index('\0', mode_end)
         name = text[mode_end+1:name_end]
-
         count = name_end+21
-
         sha = text[name_end+1:count]
-
         ret[name] = (mode, sha_to_hex(sha))
-
     return ret
 
 
+def serialize_tree(items):
+    """Serialize the items in a tree to a text.
+
+    :param items: Sorted iterable over (name, mode, sha) tuples
+    :return: Serialized tree text
+    """
+    f = StringIO()
+    for name, mode, hexsha in items:
+        f.write("%04o %s\0%s" % (mode, name, hex_to_sha(hexsha)))
+    return f.getvalue()
+
+
+def sorted_tree_items(entries):
+    """Iterate over a tree entries dictionary in the order in which 
+    the items would be serialized.
+
+    :param entries: Dictionary mapping names to (mode, sha) tuples
+    :return: Iterator over (name, mode, sha)
+    """
+    def cmp_entry((name1, value1), (name2, value2)):
+        if stat.S_ISDIR(value1[0]):
+            name1 += "/"
+        if stat.S_ISDIR(value2[0]):
+            name2 += "/"
+        return cmp(name1, name2)
+    for name, entry in sorted(entries.iteritems(), cmp=cmp_entry):
+        yield name, entry[0], entry[1]
+
+
 class Tree(ShaFile):
     """A Git tree object"""
 
@@ -494,19 +524,19 @@ class Tree(ShaFile):
     def entries(self):
         """Return a list of tuples describing the tree entries"""
         self._ensure_parsed()
-        # The order of this is different from iteritems() for historical reasons
-        return [(mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
+        # The order of this is different from iteritems() for historical
+        # reasons
+        return [
+            (mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
 
     def iteritems(self):
-        def cmp_entry((name1, value1), (name2, value2)):
-            if stat.S_ISDIR(value1[0]):
-                name1 += "/"
-            if stat.S_ISDIR(value2[0]):
-                name2 += "/"
-            return cmp(name1, name2)
+        """Iterate over all entries in the order in which they would be
+        serialized.
+
+        :return: Iterator over (name, mode, sha) tuples
+        """
         self._ensure_parsed()
-        for name, entry in sorted(self._entries.iteritems(), cmp=cmp_entry):
-            yield name, entry[0], entry[1]
+        return sorted_tree_items(self._entries)
 
     def _parse_text(self):
         """Grab the entries in the tree"""
@@ -514,10 +544,7 @@ class Tree(ShaFile):
         self._needs_parsing = False
 
     def serialize(self):
-        f = StringIO()
-        for name, mode, hexsha in self.iteritems():
-            f.write("%04o %s\0%s" % (mode, name, hex_to_sha(hexsha)))
-        self._text = f.getvalue()
+        self._text = serialize_tree(self.iteritems())
         self._needs_serialization = False
 
     def as_pretty_string(self):
@@ -682,6 +709,6 @@ num_type_map = {
 
 try:
     # Try to import C versions
-    from dulwich._objects import parse_tree
+    from dulwich._objects import parse_tree, sorted_tree_items
 except ImportError:
     pass