dulwich/pack.py

   1 # pack.py -- For dealing wih packed git objects.
   2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
   3 # Copryight (C) 2008 Jelmer Vernooij <jelmer@samba.org>
   4 # The code is loosely based on that in the sha1_file.c file from git itself,
   5 # which is Copyright (C) Linus Torvalds, 2005 and distributed under the
   6 # GPL version 2.
   7 #
   8 # This program is free software; you can redistribute it and/or
   9 # modify it under the terms of the GNU General Public License
  10 # as published by the Free Software Foundation; version 2
  11 # of the License.
  12 #
  13 # This program is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with this program; if not, write to the Free Software
  20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  21 # MA  02110-1301, USA.
  22
  23 """Classes for dealing with packed git objects.
  24
  25 A pack is a compact representation of a bunch of objects, stored
  26 using deltas where possible.
  27
  28 They have two parts, the pack file, which stores the data, and an index
  29 that tells you where the data is.
  30
  31 To find an object you look in all of the index files 'til you find a
  32 match for the object name. You then use the pointer got from this as
  33 a pointer in to the corresponding packfile.
  34 """
  35
  36 from collections import defaultdict
  37 import hashlib
  38 from itertools import imap, izip
  39 import mmap
  40 import os
  41 import sha
  42 import struct
  43 import sys
  44 import zlib
  45 import difflib
  46
  47 from objects import (
  48         ShaFile,
  49         hex_to_sha,
  50         sha_to_hex,
  51         )
  52 from errors import ApplyDeltaError
  53
  54 supports_mmap_offset = (sys.version_info[0] >= 3 or
  55         (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
  56
  57
  58 def take_msb_bytes(map, offset):
  59     ret = []
  60     while len(ret) == 0 or ret[-1] & 0x80:
  61         ret.append(ord(map[offset]))
  62         offset += 1
  63     return ret
  64
  65
  66 def read_zlib(data, offset, dec_size):
  67     obj = zlib.decompressobj()
  68     x = ""
  69     fed = 0
  70     while obj.unused_data == "":
  71         base = offset+fed
  72         add = data[base:base+1024]
  73         fed += len(add)
  74         x += obj.decompress(add)
  75     assert len(x) == dec_size
  76     comp_len = fed-len(obj.unused_data)
  77     return x, comp_len
  78
  79
  80 def iter_sha1(iter):
  81     sha = hashlib.sha1()
  82     for name in iter:
  83         sha.update(name)
  84     return sha.hexdigest()
  85
  86
  87 MAX_MMAP_SIZE = 256 * 1024 * 1024
  88
  89 def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
  90     """Simple wrapper for mmap() which always supports the offset parameter.
  91
  92     :param f: File object.
  93     :param offset: Offset in the file, from the beginning of the file.
  94     :param size: Size of the mmap'ed area
  95     :param access: Access mechanism.
  96     :return: MMAP'd area.
  97     """
  98     if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset:
  99         raise AssertionError("%s is larger than 256 meg, and this version "
 100             "of Python does not support the offset argument to mmap().")
 101     if supports_mmap_offset:
 102         return mmap.mmap(f.fileno(), size, access=access, offset=offset)
 103     else:
 104         class ArraySkipper(object):
 105
 106             def __init__(self, array, offset):
 107                 self.array = array
 108                 self.offset = offset
 109
 110             def __getslice__(self, i, j):
 111                 return self.array[i+self.offset:j+self.offset]
 112
 113             def __getitem__(self, i):
 114                 return self.array[i+self.offset]
 115
 116             def __len__(self):
 117                 return len(self.array) - self.offset
 118
 119             def __str__(self):
 120                 return str(self.array[self.offset:])
 121
 122         mem = mmap.mmap(f.fileno(), size+offset, access=access)
 123         if offset == 0:
 124             return mem
 125         return ArraySkipper(mem, offset)
 126
 127
 128 def resolve_object(offset, type, obj, get_ref, get_offset):
 129   """Resolve an object, possibly resolving deltas when necessary."""
 130   if not type in (6, 7): # Not a delta
 131      return type, obj
 132
 133   if type == 6: # offset delta
 134      (delta_offset, delta) = obj
 135      assert isinstance(delta_offset, int)
 136      assert isinstance(delta, str)
 137      offset = offset-delta_offset
 138      type, base_obj = get_offset(offset)
 139      assert isinstance(type, int)
 140   elif type == 7: # ref delta
 141      (basename, delta) = obj
 142      assert isinstance(basename, str) and len(basename) == 20
 143      assert isinstance(delta, str)
 144      type, base_obj = get_ref(basename)
 145      assert isinstance(type, int)
 146   type, base_text = resolve_object(offset, type, base_obj, get_ref, get_offset)
 147   return type, apply_delta(base_text, delta)
 148
 149
 150 class PackIndex(object):
 151   """An index in to a packfile.
 152
 153   Given a sha id of an object a pack index can tell you the location in the
 154   packfile of that object if it has it.
 155
 156   To do the loop it opens the file, and indexes first 256 4 byte groups
 157   with the first byte of the sha id. The value in the four byte group indexed
 158   is the end of the group that shares the same starting byte. Subtract one
 159   from the starting byte and index again to find the start of the group.
 160   The values are sorted by sha id within the group, so do the math to find
 161   the start and end offset and then bisect in to find if the value is present.
 162   """
 163
 164   def __init__(self, filename):
 165     """Create a pack index object.
 166
 167     Provide it with the name of the index file to consider, and it will map
 168     it whenever required.
 169     """
 170     self._filename = filename
 171     # Take the size now, so it can be checked each time we map the file to
 172     # ensure that it hasn't changed.
 173     self._size = os.path.getsize(filename)
 174     self._file = open(filename, 'r')
 175     self._contents = simple_mmap(self._file, 0, self._size)
 176     if self._contents[:4] != '\377tOc':
 177         self.version = 1
 178         self._fan_out_table = self._read_fan_out_table(0)
 179     else:
 180         (self.version, ) = struct.unpack_from(">L", self._contents, 4)
 181         assert self.version in (2,), "Version was %d" % self.version
 182         self._fan_out_table = self._read_fan_out_table(8)
 183         self._name_table_offset = 8 + 0x100 * 4
 184         self._crc32_table_offset = self._name_table_offset + 20 * len(self)
 185         self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
 186
 187   def __eq__(self, other):
 188     if type(self) != type(other):
 189         return False
 190
 191     if self._fan_out_table != other._fan_out_table:
 192         return False
 193
 194     for (name1, _, _), (name2, _, _) in izip(self.iterentries(), other.iterentries()):
 195         if name1 != name2:
 196             return False
 197     return True
 198
 199   def close(self):
 200     self._file.close()
 201
 202   def __len__(self):
 203     """Return the number of entries in this pack index."""
 204     return self._fan_out_table[-1]
 205
 206   def _unpack_entry(self, i):
 207     """Unpack the i-th entry in the index file.
 208
 209     :return: Tuple with object name (SHA), offset in pack file and
 210           CRC32 checksum (if known)."""
 211     if self.version == 1:
 212         (offset, name) = struct.unpack_from(">L20s", self._contents,
 213             (0x100 * 4) + (i * 24))
 214         return (name, offset, None)
 215     else:
 216         return (self._unpack_name(i), self._unpack_offset(i),
 217                 self._unpack_crc32_checksum(i))
 218
 219   def _unpack_name(self, i):
 220     if self.version == 1:
 221         return self._unpack_entry(i)[0]
 222     else:
 223         return struct.unpack_from("20s", self._contents,
 224                                   self._name_table_offset + i * 20)[0]
 225
 226   def _unpack_offset(self, i):
 227     if self.version == 1:
 228         return self._unpack_entry(i)[1]
 229     else:
 230         return struct.unpack_from(">L", self._contents,
 231                                   self._pack_offset_table_offset + i * 4)[0]
 232
 233   def _unpack_crc32_checksum(self, i):
 234     if self.version == 1:
 235         return None
 236     else:
 237         return struct.unpack_from(">L", self._contents,
 238                                   self._crc32_table_offset + i * 4)[0]
 239
 240   def __iter__(self):
 241       return imap(sha_to_hex, self._itersha())
 242
 243   def _itersha(self):
 244     for i in range(len(self)):
 245         yield self._unpack_name(i)
 246
 247   def objects_sha1(self):
 248     return iter_sha1(self._itersha())
 249
 250   def iterentries(self):
 251     """Iterate over the entries in this pack index.
 252
 253     Will yield tuples with object name, offset in packfile and crc32 checksum.
 254     """
 255     for i in range(len(self)):
 256         yield self._unpack_entry(i)
 257
 258   def _read_fan_out_table(self, start_offset):
 259     ret = []
 260     for i in range(0x100):
 261         ret.append(struct.unpack(">L", self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])
 262     return ret
 263
 264   def check(self):
 265     """Check that the stored checksum matches the actual checksum."""
 266     return self.calculate_checksum() == self.get_stored_checksums()[1]
 267
 268   def calculate_checksum(self):
 269     f = open(self._filename, 'r')
 270     try:
 271         return hashlib.sha1(self._contents[:-20]).digest()
 272     finally:
 273         f.close()
 274
 275   def get_stored_checksums(self):
 276     """Return the SHA1 checksums stored for the corresponding packfile and
 277     this header file itself."""
 278     return str(self._contents[-40:-20]), str(self._contents[-20:])
 279
 280   def object_index(self, sha):
 281     """Return the index in to the corresponding packfile for the object.
 282
 283     Given the name of an object it will return the offset that object lives
 284     at within the corresponding pack file. If the pack file doesn't have the
 285     object then None will be returned.
 286     """
 287     size = os.path.getsize(self._filename)
 288     assert size == self._size, "Pack index %s has changed size, I don't " \
 289          "like that" % self._filename
 290     if len(sha) == 40:
 291         sha = hex_to_sha(sha)
 292     return self._object_index(sha)
 293
 294   def _object_index(self, sha):
 295       """See object_index"""
 296       idx = ord(sha[0])
 297       if idx == 0:
 298           start = 0
 299       else:
 300           start = self._fan_out_table[idx-1]
 301       end = self._fan_out_table[idx]
 302       assert start <= end
 303       while start <= end:
 304         i = (start + end)/2
 305         file_sha = self._unpack_name(i)
 306         if file_sha < sha:
 307           start = i + 1
 308         elif file_sha > sha:
 309           end = i - 1
 310         else:
 311           return self._unpack_offset(i)
 312       return None
 313
 314
 315 def read_pack_header(f):
 316     header = f.read(12)
 317     assert header[:4] == "PACK"
 318     (version,) = struct.unpack_from(">L", header, 4)
 319     assert version in (2, 3), "Version was %d" % version
 320     (num_objects,) = struct.unpack_from(">L", header, 8)
 321     return (version, num_objects)
 322
 323
 324 def read_pack_tail(f):
 325     return (f.read(20),)
 326
 327
 328 def unpack_object(map):
 329     bytes = take_msb_bytes(map, 0)
 330     type = (bytes[0] >> 4) & 0x07
 331     size = bytes[0] & 0x0f
 332     for i, byte in enumerate(bytes[1:]):
 333       size += (byte & 0x7f) << ((i * 7) + 4)
 334     raw_base = len(bytes)
 335     if type == 6: # offset delta
 336         bytes = take_msb_bytes(map, raw_base)
 337         assert not (bytes[-1] & 0x80)
 338         delta_base_offset = bytes[0] & 0x7f
 339         for byte in bytes[1:]:
 340             delta_base_offset += 1
 341             delta_base_offset <<= 7
 342             delta_base_offset += (byte & 0x7f)
 343         raw_base+=len(bytes)
 344         uncomp, comp_len = read_zlib(map, raw_base, size)
 345         assert size == len(uncomp)
 346         return type, (delta_base_offset, uncomp), comp_len+raw_base
 347     elif type == 7: # ref delta
 348         basename = map[raw_base:raw_base+20]
 349         uncomp, comp_len = read_zlib(map, raw_base+20, size)
 350         assert size == len(uncomp)
 351         return type, (basename, uncomp), comp_len+raw_base+20
 352     else:
 353         uncomp, comp_len = read_zlib(map, raw_base, size)
 354         assert len(uncomp) == size
 355         return type, uncomp, comp_len+raw_base
 356
 357
 358 class PackData(object):
 359   """The data contained in a packfile.
 360
 361   Pack files can be accessed both sequentially for exploding a pack, and
 362   directly with the help of an index to retrieve a specific object.
 363
 364   The objects within are either complete or a delta aginst another.
 365
 366   The header is variable length. If the MSB of each byte is set then it
 367   indicates that the subsequent byte is still part of the header.
 368   For the first byte the next MS bits are the type, which tells you the type
 369   of object, and whether it is a delta. The LS byte is the lowest bits of the
 370   size. For each subsequent byte the LS 7 bits are the next MS bits of the
 371   size, i.e. the last byte of the header contains the MS bits of the size.
 372
 373   For the complete objects the data is stored as zlib deflated data.
 374   The size in the header is the uncompressed object size, so to uncompress
 375   you need to just keep feeding data to zlib until you get an object back,
 376   or it errors on bad data. This is done here by just giving the complete
 377   buffer from the start of the deflated object on. This is bad, but until I
 378   get mmap sorted out it will have to do.
 379
 380   Currently there are no integrity checks done. Also no attempt is made to try
 381   and detect the delta case, or a request for an object at the wrong position.
 382   It will all just throw a zlib or KeyError.
 383   """
 384
 385   def __init__(self, filename):
 386     """Create a PackData object that represents the pack in the given filename.
 387
 388     The file must exist and stay readable until the object is disposed of. It
 389     must also stay the same size. It will be mapped whenever needed.
 390
 391     Currently there is a restriction on the size of the pack as the python
 392     mmap implementation is flawed.
 393     """
 394     self._filename = filename
 395     assert os.path.exists(filename), "%s is not a packfile" % filename
 396     self._size = os.path.getsize(filename)
 397     self._header_size = 12
 398     assert self._size >= self._header_size, "%s is too small for a packfile" % filename
 399     self._read_header()
 400
 401   def _read_header(self):
 402     f = open(self._filename, 'rb')
 403     try:
 404         (version, self._num_objects) = \
 405                 read_pack_header(f)
 406         f.seek(self._size-20)
 407         (self._stored_checksum,) = read_pack_tail(f)
 408     finally:
 409         f.close()
 410
 411   def __len__(self):
 412       """Returns the number of objects in this pack."""
 413       return self._num_objects
 414
 415   def calculate_checksum(self):
 416     f = open(self._filename, 'rb')
 417     try:
 418         map = simple_mmap(f, 0, self._size)
 419         return hashlib.sha1(map[:-20]).digest()
 420     finally:
 421         f.close()
 422
 423   def iterobjects(self):
 424     offset = self._header_size
 425     f = open(self._filename, 'rb')
 426     for i in range(len(self)):
 427         map = simple_mmap(f, offset, self._size-offset)
 428         (type, obj, total_size) = unpack_object(map)
 429         yield offset, type, obj
 430         offset += total_size
 431     f.close()
 432
 433   def iterentries(self, ext_resolve_ref=None):
 434     found = {}
 435     at = {}
 436     postponed = defaultdict(list)
 437     class Postpone(Exception):
 438         """Raised to postpone delta resolving."""
 439
 440     def get_ref_text(sha):
 441         if sha in found:
 442             return found[sha]
 443         if ext_resolve_ref:
 444             try:
 445                 return ext_resolve_ref(sha)
 446             except KeyError:
 447                 pass
 448         raise Postpone, (sha, )
 449     todo = list(self.iterobjects())
 450     while todo:
 451       (offset, type, obj) = todo.pop(0)
 452       at[offset] = (type, obj)
 453       assert isinstance(offset, int)
 454       assert isinstance(type, int)
 455       assert isinstance(obj, tuple) or isinstance(obj, str)
 456       try:
 457         type, obj = resolve_object(offset, type, obj, get_ref_text,
 458             at.__getitem__)
 459       except Postpone, (sha, ):
 460         postponed[sha].append((offset, type, obj))
 461       else:
 462         shafile = ShaFile.from_raw_string(type, obj)
 463         sha = shafile.sha().digest()
 464         found[sha] = (type, obj)
 465         yield sha, offset, shafile.crc32()
 466         todo += postponed.get(sha, [])
 467     if postponed:
 468         raise KeyError([sha_to_hex(h) for h in postponed.keys()])
 469
 470   def sorted_entries(self, resolve_ext_ref=None):
 471     ret = list(self.iterentries(resolve_ext_ref))
 472     ret.sort()
 473     return ret
 474
 475   def create_index_v1(self, filename):
 476     entries = self.sorted_entries()
 477     write_pack_index_v1(filename, entries, self.calculate_checksum())
 478
 479   def create_index_v2(self, filename):
 480     entries = self.sorted_entries()
 481     write_pack_index_v2(filename, entries, self.calculate_checksum())
 482
 483   def get_stored_checksum(self):
 484     return self._stored_checksum
 485
 486   def check(self):
 487     return (self.calculate_checksum() == self.get_stored_checksum())
 488
 489   def get_object_at(self, offset):
 490     """Given an offset in to the packfile return the object that is there.
 491
 492     Using the associated index the location of an object can be looked up, and
 493     then the packfile can be asked directly for that object using this
 494     function.
 495     """
 496     assert isinstance(offset, long) or isinstance(offset, int),\
 497             "offset was %r" % offset
 498     assert offset >= self._header_size
 499     size = os.path.getsize(self._filename)
 500     assert size == self._size, "Pack data %s has changed size, I don't " \
 501          "like that" % self._filename
 502     f = open(self._filename, 'rb')
 503     try:
 504       map = simple_mmap(f, offset, size-offset)
 505       return unpack_object(map)[:2]
 506     finally:
 507       f.close()
 508
 509
 510 class SHA1Writer(object):
 511
 512     def __init__(self, f):
 513         self.f = f
 514         self.sha1 = hashlib.sha1("")
 515
 516     def write(self, data):
 517         self.sha1.update(data)
 518         self.f.write(data)
 519
 520     def write_sha(self):
 521         sha = self.sha1.digest()
 522         assert len(sha) == 20
 523         self.f.write(sha)
 524         return sha
 525
 526     def close(self):
 527         sha = self.write_sha()
 528         self.f.close()
 529         return sha
 530
 531     def tell(self):
 532         return self.f.tell()
 533
 534
 535 def write_pack_object(f, type, object):
 536     """Write pack object to a file.
 537
 538     :param f: File to write to
 539     :param o: Object to write
 540     """
 541     ret = f.tell()
 542     if type == 6: # ref delta
 543         (delta_base_offset, object) = object
 544     elif type == 7: # offset delta
 545         (basename, object) = object
 546     size = len(object)
 547     c = (type << 4) | (size & 15)
 548     size >>= 4
 549     while size:
 550         f.write(chr(c | 0x80))
 551         c = size & 0x7f
 552         size >>= 7
 553     f.write(chr(c))
 554     if type == 6: # offset delta
 555         ret = [delta_base_offset & 0x7f]
 556         delta_base_offset >>= 7
 557         while delta_base_offset:
 558             delta_base_offset -= 1
 559             ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
 560             delta_base_offset >>= 7
 561         f.write("".join([chr(x) for x in ret]))
 562     elif type == 7: # ref delta
 563         assert len(basename) == 20
 564         f.write(basename)
 565     f.write(zlib.compress(object))
 566     return f.tell()
 567
 568
 569 def write_pack(filename, objects, num_objects):
 570     f = open(filename + ".pack", 'w')
 571     try:
 572         entries, data_sum = write_pack_data(f, objects, num_objects)
 573     except:
 574         f.close()
 575     entries.sort()
 576     write_pack_index_v2(filename + ".idx", entries, data_sum)
 577
 578
 579 def write_pack_data(f, objects, num_objects):
 580     """Write a new pack file.
 581
 582     :param filename: The filename of the new pack file.
 583     :param objects: List of objects to write.
 584     :return: List with (name, offset, crc32 checksum) entries, pack checksum
 585     """
 586     entries = []
 587     f = SHA1Writer(f)
 588     f.write("PACK")               # Pack header
 589     f.write(struct.pack(">L", 2)) # Pack version
 590     f.write(struct.pack(">L", num_objects)) # Number of objects in pack
 591     for o in objects:
 592         sha1 = o.sha().digest()
 593         crc32 = o.crc32()
 594         # FIXME: Delta !
 595         t, o = o.as_raw_string()
 596         offset = write_pack_object(f, t, o)
 597         entries.append((sha1, offset, crc32))
 598     return entries, f.write_sha()
 599
 600
 601 def write_pack_index_v1(filename, entries, pack_checksum):
 602     """Write a new pack index file.
 603
 604     :param filename: The filename of the new pack index file.
 605     :param entries: List of tuples with object name (sha), offset_in_pack,  and
 606             crc32_checksum.
 607     :param pack_checksum: Checksum of the pack file.
 608     """
 609     f = open(filename, 'w')
 610     f = SHA1Writer(f)
 611     fan_out_table = defaultdict(lambda: 0)
 612     for (name, offset, entry_checksum) in entries:
 613         fan_out_table[ord(name[0])] += 1
 614     # Fan-out table
 615     for i in range(0x100):
 616         f.write(struct.pack(">L", fan_out_table[i]))
 617         fan_out_table[i+1] += fan_out_table[i]
 618     for (name, offset, entry_checksum) in entries:
 619         f.write(struct.pack(">L20s", offset, name))
 620     assert len(pack_checksum) == 20
 621     f.write(pack_checksum)
 622     f.close()
 623
 624
 625 def create_delta(base_buf, target_buf):
 626     """Use python difflib to work out how to transform base_buf to target_buf"""
 627     assert isinstance(base_buf, str)
 628     assert isinstance(target_buf, str)
 629     out_buf = ""
 630
 631     # write delta header
 632     def encode_size(size):
 633         ret = ""
 634         c = size & 0x7f
 635         size >>= 7
 636         while size:
 637             ret += chr(c | 0x80)
 638             c = size & 0x7f
 639             size >>= 7
 640         ret += chr(c)
 641         return ret
 642     out_buf += encode_size(len(base_buf))
 643     out_buf += encode_size(len(target_buf))
 644
 645     # write out delta opcodes
 646     seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
 647     for opcode, i1, i2, j1, j2 in seq.get_opcodes():
 648         # Git patch opcodes don't care about deletes!
 649         #if opcode == "replace" or opcode == "delete":
 650         #    pass
 651
 652         if opcode == "equal":
 653             # If they are equal, unpacker will use data from base_buf
 654             # Write out an opcode that says what range to use
 655             scratch = ""
 656             op = 0x80
 657             o = i1
 658             s = i2 - i1
 659
 660             if o & 0x000000ff:
 661                 scratch += chr(o >> 0)
 662                 op |= 0x01
 663             if o & 0x0000ff00:
 664                 scratch += chr(o >> 8)
 665                 op |= 0x02
 666             if o & 0x00ff0000:
 667                 scratch += chr(o >> 16)
 668                 op |= 0x02
 669             if o & 0xff000000:
 670                 scratch += chr(o >> 24)
 671                 op |= 0x08
 672             if s & 0x00ff:
 673                 scratch += chr(s >> 0)
 674                 op |= 0x10
 675             if s & 0xff00:
 676                 scratch += chr(s >> 8)
 677                 op |= 0x20
 678
 679             out_buf += chr(op)
 680             out_buf += scratch
 681
 682
 683         if opcode == "replace" or opcode == "insert":
 684             # If we are replacing a range or adding one, then we just
 685             # output it to the stream (prefixed by its size)
 686             #FIXME: Will need to break this into multiple chunks
 687             s = j2 - j1
 688             o = j1
 689             while s > 127:
 690                 out_buf += chr(127)
 691                 out_buf += target_buf[o:o+127]
 692                 s -= 127
 693                 o += 127
 694             out_buf += chr(s)
 695             out_buf += target_buf[o:o+s]
 696
 697     return out_buf
 698
 699
 700 def apply_delta(src_buf, delta):
 701     """Based on the similar function in git's patch-delta.c."""
 702     assert isinstance(src_buf, str), "was %r" % (src_buf,)
 703     assert isinstance(delta, str)
 704     out = ""
 705     def pop(delta):
 706         ret = delta[0]
 707         delta = delta[1:]
 708         return ord(ret), delta
 709     def get_delta_header_size(delta):
 710         size = 0
 711         i = 0
 712         while delta:
 713             cmd, delta = pop(delta)
 714             size |= (cmd & ~0x80) << i
 715             i += 7
 716             if not cmd & 0x80:
 717                 break
 718         return size, delta
 719     src_size, delta = get_delta_header_size(delta)
 720     dest_size, delta = get_delta_header_size(delta)
 721     assert src_size == len(src_buf), "%d vs %d" % (src_size, len(src_buf))
 722     while delta:
 723         cmd, delta = pop(delta)
 724         if cmd & 0x80:
 725             cp_off = 0
 726             for i in range(4):
 727                 if cmd & (1 << i):
 728                     x, delta = pop(delta)
 729                     cp_off |= x << (i * 8)
 730             cp_size = 0
 731             for i in range(3):
 732                 if cmd & (1 << (4+i)):
 733                     x, delta = pop(delta)
 734                     cp_size |= x << (i * 8)
 735             if cp_size == 0:
 736                 cp_size = 0x10000
 737             if (cp_off + cp_size < cp_size or
 738                 cp_off + cp_size > src_size or
 739                 cp_size > dest_size):
 740                 break
 741             out += src_buf[cp_off:cp_off+cp_size]
 742         elif cmd != 0:
 743             out += delta[:cmd]
 744             delta = delta[cmd:]
 745         else:
 746             raise ApplyDeltaError("Invalid opcode 0")
 747
 748     if delta != "":
 749         raise ApplyDeltaError("delta not empty: %r" % delta)
 750
 751     if dest_size != len(out):
 752         raise ApplyDeltaError("dest size incorrect")
 753
 754     return out
 755
 756
 757 def write_pack_index_v2(filename, entries, pack_checksum):
 758     """Write a new pack index file.
 759
 760     :param filename: The filename of the new pack index file.
 761     :param entries: List of tuples with object name (sha), offset_in_pack,  and
 762             crc32_checksum.
 763     :param pack_checksum: Checksum of the pack file.
 764     """
 765     f = open(filename, 'w')
 766     f = SHA1Writer(f)
 767     f.write('\377tOc') # Magic!
 768     f.write(struct.pack(">L", 2))
 769     fan_out_table = defaultdict(lambda: 0)
 770     for (name, offset, entry_checksum) in entries:
 771         fan_out_table[ord(name[0])] += 1
 772     # Fan-out table
 773     for i in range(0x100):
 774         f.write(struct.pack(">L", fan_out_table[i]))
 775         fan_out_table[i+1] += fan_out_table[i]
 776     for (name, offset, entry_checksum) in entries:
 777         f.write(name)
 778     for (name, offset, entry_checksum) in entries:
 779         f.write(struct.pack(">l", entry_checksum))
 780     for (name, offset, entry_checksum) in entries:
 781         # FIXME: handle if MSBit is set in offset
 782         f.write(struct.pack(">L", offset))
 783     # FIXME: handle table for pack files > 8 Gb
 784     assert len(pack_checksum) == 20
 785     f.write(pack_checksum)
 786     f.close()
 787
 788
 789 class Pack(object):
 790
 791     def __init__(self, basename):
 792         self._basename = basename
 793         self._data_path = self._basename + ".pack"
 794         self._idx_path = self._basename + ".idx"
 795         self._data = None
 796         self._idx = None
 797
 798     def name(self):
 799         return self.idx.objects_sha1()
 800
 801     @property
 802     def data(self):
 803         if self._data is None:
 804             self._data = PackData(self._data_path)
 805             assert len(self.idx) == len(self._data)
 806             assert self.idx.get_stored_checksums()[0] == self._data.get_stored_checksum()
 807         return self._data
 808
 809     @property
 810     def idx(self):
 811         if self._idx is None:
 812             self._idx = PackIndex(self._idx_path)
 813         return self._idx
 814
 815     def close(self):
 816         if self._data is not None:
 817             self._data.close()
 818         self.idx.close()
 819
 820     def __eq__(self, other):
 821         return type(self) == type(other) and self.idx == other.idx
 822
 823     def __len__(self):
 824         """Number of entries in this pack."""
 825         return len(self.idx)
 826
 827     def __repr__(self):
 828         return "Pack(%r)" % self._basename
 829
 830     def __iter__(self):
 831         """Iterate over all the sha1s of the objects in this pack."""
 832         return iter(self.idx)
 833
 834     def check(self):
 835         return self.idx.check() and self.data.check()
 836
 837     def get_stored_checksum(self):
 838         return self.data.get_stored_checksum()
 839
 840     def __contains__(self, sha1):
 841         """Check whether this pack contains a particular SHA1."""
 842         return (self.idx.object_index(sha1) is not None)
 843
 844     def get_raw(self, sha1, resolve_ref=None):
 845         if resolve_ref is None:
 846             resolve_ref = self.get_raw
 847         offset = self.idx.object_index(sha1)
 848         if offset is None:
 849             raise KeyError(sha1)
 850
 851         type, obj = self.data.get_object_at(offset)
 852         assert isinstance(offset, int)
 853         return resolve_object(offset, type, obj, resolve_ref,
 854             self.data.get_object_at)
 855
 856     def __getitem__(self, sha1):
 857         """Retrieve the specified SHA1."""
 858         type, uncomp = self.get_raw(sha1)
 859         return ShaFile.from_raw_string(type, uncomp)
 860
 861     def iterobjects(self):
 862         for offset, type, obj in self.data.iterobjects():
 863             assert isinstance(offset, int)
 864             yield ShaFile.from_raw_string(
 865                     *resolve_object(offset, type, obj, self.get_raw,
 866                 self.data.get_object_at))
 867
 868
 869 def load_packs(path):
 870     if not os.path.exists(path):
 871         return
 872     for name in os.listdir(path):
 873         if name.startswith("pack-") and name.endswith(".pack"):
 874             yield Pack(os.path.join(path, name[:-len(".pack")]))
 875