dulwich/objects.py

   1 # objects.py -- Acces to base git objects
   2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
   3 # The header parsing code is based on that from git itself, which is
   4 # Copyright (C) 2005 Linus Torvalds
   5 # and licensed under v2 of the GPL.
   6 #
   7 # This program is free software; you can redistribute it and/or
   8 # modify it under the terms of the GNU General Public License
   9 # as published by the Free Software Foundation; version 2
  10 # of the License.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  20 # MA  02110-1301, USA.
  21
  22 import mmap
  23 import os
  24 import sha
  25 import zlib
  26
  27 from errors import (NotCommitError,
  28                     NotTreeError,
  29                     NotBlobError,
  30                     )
  31
  32 blob_id = "blob"
  33 tag_id = "tag"
  34 tree_id = "tree"
  35 commit_id = "commit"
  36 parent_id = "parent"
  37 author_id = "author"
  38 committer_id = "committer"
  39
  40 def _decompress(string):
  41     dcomp = zlib.decompressobj()
  42     dcomped = dcomp.decompress(string)
  43     dcomped += dcomp.flush()
  44     return dcomped
  45
  46 def sha_to_hex(sha):
  47   """Takes a string and returns the hex of the sha within"""
  48   hexsha = ''
  49   for c in sha:
  50     hexsha += "%02x" % ord(c)
  51   assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
  52          len(hexsha)
  53   return hexsha
  54
  55
  56 class ShaFile(object):
  57   """A git SHA file."""
  58
  59   @classmethod
  60   def _parse_legacy_object(cls, map):
  61     """Parse a legacy object, creating it and setting object._text"""
  62     text = _decompress(map)
  63     object = None
  64     for posstype in type_map.keys():
  65       if text.startswith(posstype):
  66         object = type_map[posstype]()
  67         text = text[len(posstype):]
  68         break
  69     assert object is not None, "%s is not a known object type" % text[:9]
  70     assert text[0] == ' ', "%s is not a space" % text[0]
  71     text = text[1:]
  72     size = 0
  73     i = 0
  74     while text[0] >= '0' and text[0] <= '9':
  75       if i > 0 and size == 0:
  76         assert False, "Size is not in canonical format"
  77       size = (size * 10) + int(text[0])
  78       text = text[1:]
  79       i += 1
  80     object._size = size
  81     assert text[0] == "\0", "Size not followed by null"
  82     text = text[1:]
  83     object._text = text
  84     return object
  85
  86   def as_raw_string(self):
  87     return self._num_type, self._text
  88
  89   @classmethod
  90   def _parse_object(cls, map):
  91     """Parse a new style object , creating it and setting object._text"""
  92     used = 0
  93     byte = ord(map[used])
  94     used += 1
  95     num_type = (byte >> 4) & 7
  96     try:
  97       object = num_type_map[num_type]()
  98     except KeyError:
  99       assert False, "Not a known type: %d" % num_type
 100     while((byte & 0x80) != 0):
 101       byte = ord(map[used])
 102       used += 1
 103     raw = map[used:]
 104     object._text = _decompress(raw)
 105     return object
 106
 107   @classmethod
 108   def _parse_file(cls, map):
 109     word = (ord(map[0]) << 8) + ord(map[1])
 110     if ord(map[0]) == 0x78 and (word % 31) == 0:
 111       return cls._parse_legacy_object(map)
 112     else:
 113       return cls._parse_object(map)
 114
 115   def __init__(self):
 116     """Don't call this directly"""
 117
 118   def _parse_text(self):
 119     """For subclasses to do initialistion time parsing"""
 120
 121   @classmethod
 122   def from_file(cls, filename):
 123     """Get the contents of a SHA file on disk"""
 124     size = os.path.getsize(filename)
 125     f = open(filename, 'rb')
 126     try:
 127       map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
 128       shafile = cls._parse_file(map)
 129       shafile._parse_text()
 130       return shafile
 131     finally:
 132       f.close()
 133
 134   @classmethod
 135   def from_raw_string(cls, type, string):
 136     """Creates an object of the indicated type from the raw string given.
 137
 138     Type is the numeric type of an object. String is the raw uncompressed
 139     contents.
 140     """
 141     real_class = num_type_map[type]
 142     obj = real_class()
 143     obj._num_type = type
 144     obj._text = string
 145     obj._parse_text()
 146     return obj
 147
 148   def _header(self):
 149     return "%s %lu\0" % (self._type, len(self._text))
 150
 151   def crc32(self):
 152     return zlib.crc32(self._text)
 153
 154   def sha(self):
 155     """The SHA1 object that is the name of this object."""
 156     ressha = sha.new()
 157     ressha.update(self._header())
 158     ressha.update(self._text)
 159     return ressha
 160
 161   def __eq__(self, other):
 162     """Return true id the sha of the two objects match.
 163
 164     The __le__ etc methods aren't overriden as they make no sense,
 165     certainly at this level.
 166     """
 167     return self.sha().digest() == other.sha().digest()
 168
 169
 170 class Blob(ShaFile):
 171   """A Git Blob object."""
 172
 173   _type = blob_id
 174
 175   @property
 176   def data(self):
 177     """The text contained within the blob object."""
 178     return self._text
 179
 180   @classmethod
 181   def from_file(cls, filename):
 182     blob = ShaFile.from_file(filename)
 183     if blob._type != cls._type:
 184       raise NotBlobError(filename)
 185     return blob
 186
 187   @classmethod
 188   def from_string(cls, string):
 189     """Create a blob from a string."""
 190     shafile = cls()
 191     shafile._text = string
 192     return shafile
 193
 194
 195 class Tag(ShaFile):
 196   """A Git Tag object."""
 197
 198   _type = tag_id
 199
 200   @classmethod
 201   def from_file(cls, filename):
 202     blob = ShaFile.from_file(filename)
 203     if blob._type != cls._type:
 204       raise NotBlobError(filename)
 205     return blob
 206
 207   @classmethod
 208   def from_string(cls, string):
 209     """Create a blob from a string."""
 210     shafile = cls()
 211     shafile._text = string
 212     return shafile
 213
 214
 215 class Tree(ShaFile):
 216   """A Git tree object"""
 217
 218   _type = tree_id
 219
 220   @classmethod
 221   def from_file(cls, filename):
 222     tree = ShaFile.from_file(filename)
 223     if tree._type != cls._type:
 224       raise NotTreeError(filename)
 225     return tree
 226
 227   def entries(self):
 228     """Return a list of tuples describing the tree entries"""
 229     return self._entries
 230
 231   def _parse_text(self):
 232     """Grab the entries in the tree"""
 233     self._entries = []
 234     count = 0
 235     while count < len(self._text):
 236       mode = 0
 237       chr = self._text[count]
 238       while chr != ' ':
 239         assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
 240         mode = (mode << 3) + (ord(chr) - ord('0'))
 241         count += 1
 242         chr = self._text[count]
 243       count += 1
 244       chr = self._text[count]
 245       name = ''
 246       while chr != '\0':
 247         name += chr
 248         count += 1
 249         chr = self._text[count]
 250       count += 1
 251       chr = self._text[count]
 252       sha = self._text[count:count+20]
 253       hexsha = sha_to_hex(sha)
 254       self._entries.append((mode, name, hexsha))
 255       count = count + 20
 256
 257 class Commit(ShaFile):
 258   """A git commit object"""
 259
 260   _type = commit_id
 261
 262   @classmethod
 263   def from_file(cls, filename):
 264     commit = ShaFile.from_file(filename)
 265     if commit._type != cls._type:
 266       raise NotCommitError(filename)
 267     return commit
 268
 269   def _parse_text(self):
 270     text = self._text
 271     count = 0
 272     assert text.startswith(tree_id), "Invalid commit object, " \
 273          "must start with %s" % tree_id
 274     count += len(tree_id)
 275     assert text[count] == ' ', "Invalid commit object, " \
 276          "%s must be followed by space not %s" % (tree_id, text[count])
 277     count += 1
 278     self._tree = text[count:count+40]
 279     count = count + 40
 280     assert text[count] == "\n", "Invalid commit object, " \
 281          "tree sha must be followed by newline"
 282     count += 1
 283     self._parents = []
 284     while text[count:].startswith(parent_id):
 285       count += len(parent_id)
 286       assert text[count] == ' ', "Invalid commit object, " \
 287            "%s must be followed by space not %s" % (parent_id, text[count])
 288       count += 1
 289       self._parents.append(text[count:count+40])
 290       count += 40
 291       assert text[count] == "\n", "Invalid commit object, " \
 292            "parent sha must be followed by newline"
 293       count += 1
 294     self._author = None
 295     if text[count:].startswith(author_id):
 296       count += len(author_id)
 297       assert text[count] == ' ', "Invalid commit object, " \
 298            "%s must be followed by space not %s" % (author_id, text[count])
 299       count += 1
 300       self._author = ''
 301       while text[count] != '>':
 302         assert text[count] != '\n', "Malformed author information"
 303         self._author += text[count]
 304         count += 1
 305       self._author += text[count]
 306       count += 1
 307       while text[count] != '\n':
 308         count += 1
 309       count += 1
 310     self._committer = None
 311     if text[count:].startswith(committer_id):
 312       count += len(committer_id)
 313       assert text[count] == ' ', "Invalid commit object, " \
 314            "%s must be followed by space not %s" % (committer_id, text[count])
 315       count += 1
 316       self._committer = ''
 317       while text[count] != '>':
 318         assert text[count] != '\n', "Malformed committer information"
 319         self._committer += text[count]
 320         count += 1
 321       self._committer += text[count]
 322       count += 1
 323       assert text[count] == ' ', "Invalid commit object, " \
 324            "commiter information must be followed by space not %s" % text[count]
 325       count += 1
 326       self._commit_time = int(text[count:count+10])
 327       while text[count] != '\n':
 328         count += 1
 329       count += 1
 330     assert text[count] == '\n', "There must be a new line after the headers"
 331     count += 1
 332     # XXX: There can be an encoding field.
 333     self._message = text[count:]
 334
 335   @property
 336   def tree(self):
 337     """Returns the tree that is the state of this commit"""
 338     return self._tree
 339
 340   @property
 341   def parents(self):
 342     """Return a list of parents of this commit."""
 343     return self._parents
 344
 345   @property
 346   def author(self):
 347     """Returns the name of the author of the commit"""
 348     return self._author
 349
 350   @property
 351   def committer(self):
 352     """Returns the name of the committer of the commit"""
 353     return self._committer
 354
 355   @property
 356   def message(self):
 357     """Returns the commit message"""
 358     return self._message
 359
 360   @property
 361   def commit_time(self):
 362     """Returns the timestamp of the commit.
 363
 364     Returns it as the number of seconds since the epoch.
 365     """
 366     return self._commit_time
 367
 368   @property
 369   def id(self):
 370       return self.sha().hexdigest()
 371
 372 type_map = {
 373   blob_id : Blob,
 374   tree_id : Tree,
 375   commit_id : Commit,
 376   tag_id: Tag,
 377 }
 378
 379 num_type_map = {
 380   0: None,
 381   1: Commit,
 382   2: Tree,
 383   3: Blob,
 384   4: Tag,
 385   # 5 Is reserved for further expansion
 386 }
 387