dulwich/pack.py

   1 # pack.py -- For dealing with packed git objects.
   2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
   3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; version 2
   8 # of the License or (at your option) a later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18 # MA  02110-1301, USA.
  19
  20 """Classes for dealing with packed git objects.
  21
  22 A pack is a compact representation of a bunch of objects, stored
  23 using deltas where possible.
  24
  25 They have two parts, the pack file, which stores the data, and an index
  26 that tells you where the data is.
  27
  28 To find an object you look in all of the index files 'til you find a
  29 match for the object name. You then use the pointer got from this as
  30 a pointer in to the corresponding packfile.
  31 """
  32
  33 try:
  34     from collections import defaultdict
  35 except ImportError:
  36     from misc import defaultdict
  37
  38 from cStringIO import (
  39     StringIO,
  40     )
  41 from collections import (
  42     deque,
  43     )
  44 import difflib
  45 from itertools import (
  46     chain,
  47     imap,
  48     izip,
  49     )
  50 import mmap
  51 import os
  52 import struct
  53 try:
  54     from struct import unpack_from
  55 except ImportError:
  56     from dulwich.misc import unpack_from
  57 import sys
  58 import zlib
  59
  60 from dulwich.errors import (
  61     ApplyDeltaError,
  62     ChecksumMismatch,
  63     )
  64 from dulwich.file import GitFile
  65 from dulwich.lru_cache import (
  66     LRUSizeCache,
  67     )
  68 from dulwich.misc import (
  69     make_sha,
  70     SEEK_END,
  71     )
  72 from dulwich.objects import (
  73     ShaFile,
  74     hex_to_sha,
  75     sha_to_hex,
  76     object_header,
  77     )
  78
  79 supports_mmap_offset = (sys.version_info[0] >= 3 or
  80         (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
  81
  82
  83 OFS_DELTA = 6
  84 REF_DELTA = 7
  85
  86 DELTA_TYPES = (OFS_DELTA, REF_DELTA)
  87
  88
  89 def take_msb_bytes(read):
  90     """Read bytes marked with most significant bit.
  91
  92     :param read: Read function
  93     """
  94     ret = []
  95     while len(ret) == 0 or ret[-1] & 0x80:
  96         ret.append(ord(read(1)))
  97     return ret
  98
  99
 100 def read_zlib_chunks(read_some, dec_size, buffer_size=4096):
 101     """Read zlib data from a buffer.
 102
 103     This function requires that the buffer have additional data following the
 104     compressed data, which is guaranteed to be the case for git pack files.
 105
 106     :param read_some: Read function that returns at least one byte, but may
 107         return less than the requested size
 108     :param dec_size: Expected size of the decompressed buffer
 109     :param buffer_size: Size of the read buffer
 110     :return: Tuple with list of chunks, length of compressed data length and
 111         and unused read data.
 112     :raise zlib.error: if a decompression error occurred.
 113     """
 114     if dec_size <= -1:
 115         raise ValueError("non-negative zlib data stream size expected")
 116     obj = zlib.decompressobj()
 117     ret = []
 118     fed = 0
 119     size = 0
 120     while obj.unused_data == "":
 121         add = read_some(buffer_size)
 122         if not add:
 123             raise zlib.error("EOF before end of zlib stream")
 124         fed += len(add)
 125         decomp = obj.decompress(add)
 126         size += len(decomp)
 127         ret.append(decomp)
 128     if size != dec_size:
 129         raise zlib.error("decompressed data does not match expected size")
 130     comp_len = fed - len(obj.unused_data)
 131     return ret, comp_len, obj.unused_data
 132
 133
 134 def iter_sha1(iter):
 135     """Return the hexdigest of the SHA1 over a set of names.
 136
 137     :param iter: Iterator over string objects
 138     :return: 40-byte hex sha1 digest
 139     """
 140     sha1 = make_sha()
 141     for name in iter:
 142         sha1.update(name)
 143     return sha1.hexdigest()
 144
 145
 146 def load_pack_index(path):
 147     """Load an index file by path.
 148
 149     :param filename: Path to the index file
 150     :return: A PackIndex loaded from the given path
 151     """
 152     f = GitFile(path, 'rb')
 153     try:
 154         return load_pack_index_file(path, f)
 155     finally:
 156         f.close()
 157
 158
 159 def _load_file_contents(f, size=None):
 160     fileno = getattr(f, 'fileno', None)
 161     # Attempt to use mmap if possible
 162     if fileno is not None:
 163         fd = f.fileno()
 164         if size is None:
 165             size = os.fstat(fd).st_size
 166         try:
 167             contents = mmap.mmap(fd, size, access=mmap.ACCESS_READ)
 168         except mmap.error:
 169             # Perhaps a socket?
 170             pass
 171         else:
 172             return contents, size
 173     contents = f.read()
 174     size = len(contents)
 175     return contents, size
 176
 177
 178 def load_pack_index_file(path, f):
 179     """Load an index file from a file-like object.
 180
 181     :param path: Path for the index file
 182     :param f: File-like object
 183     :return: A PackIndex loaded from the given file
 184     """
 185     contents, size = _load_file_contents(f)
 186     if contents[:4] == '\377tOc':
 187         version = struct.unpack(">L", contents[4:8])[0]
 188         if version == 2:
 189             return PackIndex2(path, file=f, contents=contents,
 190                 size=size)
 191         else:
 192             raise KeyError("Unknown pack index format %d" % version)
 193     else:
 194         return PackIndex1(path, file=f, contents=contents, size=size)
 195
 196
 197 def bisect_find_sha(start, end, sha, unpack_name):
 198     """Find a SHA in a data blob with sorted SHAs.
 199
 200     :param start: Start index of range to search
 201     :param end: End index of range to search
 202     :param sha: Sha to find
 203     :param unpack_name: Callback to retrieve SHA by index
 204     :return: Index of the SHA, or None if it wasn't found
 205     """
 206     assert start <= end
 207     while start <= end:
 208         i = (start + end)/2
 209         file_sha = unpack_name(i)
 210         x = cmp(file_sha, sha)
 211         if x < 0:
 212             start = i + 1
 213         elif x > 0:
 214             end = i - 1
 215         else:
 216             return i
 217     return None
 218
 219
 220 class PackIndex(object):
 221     """An index in to a packfile.
 222
 223     Given a sha id of an object a pack index can tell you the location in the
 224     packfile of that object if it has it.
 225     """
 226
 227     def __eq__(self, other):
 228         if not isinstance(other, PackIndex):
 229             return False
 230
 231         for (name1, _, _), (name2, _, _) in izip(self.iterentries(),
 232                                                  other.iterentries()):
 233             if name1 != name2:
 234                 return False
 235         return True
 236
 237     def __ne__(self, other):
 238         return not self.__eq__(other)
 239
 240     def __len__(self):
 241         """Return the number of entries in this pack index."""
 242         raise NotImplementedError(self.__len__)
 243
 244     def __iter__(self):
 245         """Iterate over the SHAs in this pack."""
 246         raise NotImplementedError(self.__iter__)
 247
 248     def iterentries(self):
 249         """Iterate over the entries in this pack index.
 250
 251         :return: iterator over tuples with object name, offset in packfile and
 252             crc32 checksum.
 253         """
 254         raise NotImplementedError(self.iterentries)
 255
 256     def get_pack_checksum(self):
 257         """Return the SHA1 checksum stored for the corresponding packfile.
 258
 259         :return: 20-byte binary digest
 260         """
 261         raise NotImplementedError(self.get_pack_checksum)
 262
 263     def object_index(self, sha):
 264         """Return the index in to the corresponding packfile for the object.
 265
 266         Given the name of an object it will return the offset that object
 267         lives at within the corresponding pack file. If the pack file doesn't
 268         have the object then None will be returned.
 269         """
 270         if len(sha) == 40:
 271             sha = hex_to_sha(sha)
 272         return self._object_index(sha)
 273
 274     def _object_index(self, sha):
 275         """See object_index.
 276
 277         :param sha: A *binary* SHA string. (20 characters long)_
 278         """
 279         raise NotImplementedError(self._object_index)
 280
 281
 282 class FilePackIndex(PackIndex):
 283     """Pack index that is based on a file.
 284
 285     To do the loop it opens the file, and indexes first 256 4 byte groups
 286     with the first byte of the sha id. The value in the four byte group indexed
 287     is the end of the group that shares the same starting byte. Subtract one
 288     from the starting byte and index again to find the start of the group.
 289     The values are sorted by sha id within the group, so do the math to find
 290     the start and end offset and then bisect in to find if the value is present.
 291     """
 292
 293     def __init__(self, filename, file=None, contents=None, size=None):
 294         """Create a pack index object.
 295
 296         Provide it with the name of the index file to consider, and it will map
 297         it whenever required.
 298         """
 299         self._filename = filename
 300         # Take the size now, so it can be checked each time we map the file to
 301         # ensure that it hasn't changed.
 302         if file is None:
 303             self._file = GitFile(filename, 'rb')
 304         else:
 305             self._file = file
 306         if contents is None:
 307             self._contents, self._size = _load_file_contents(file, size)
 308         else:
 309             self._contents, self._size = (contents, size)
 310
 311     def __eq__(self, other):
 312         # Quick optimization:
 313         if isinstance(other, FilePackIndex) and self._fan_out_table != other._fan_out_table:
 314             return False
 315
 316         return super(FilePackIndex, self).__eq__(other)
 317
 318     def close(self):
 319         self._file.close()
 320
 321     def __len__(self):
 322         """Return the number of entries in this pack index."""
 323         return self._fan_out_table[-1]
 324
 325     def _unpack_entry(self, i):
 326         """Unpack the i-th entry in the index file.
 327
 328         :return: Tuple with object name (SHA), offset in pack file and CRC32
 329             checksum (if known).
 330         """
 331         raise NotImplementedError(self._unpack_entry)
 332
 333     def _unpack_name(self, i):
 334         """Unpack the i-th name from the index file."""
 335         raise NotImplementedError(self._unpack_name)
 336
 337     def _unpack_offset(self, i):
 338         """Unpack the i-th object offset from the index file."""
 339         raise NotImplementedError(self._unpack_offset)
 340
 341     def _unpack_crc32_checksum(self, i):
 342         """Unpack the crc32 checksum for the i-th object from the index file."""
 343         raise NotImplementedError(self._unpack_crc32_checksum)
 344
 345     def __iter__(self):
 346         """Iterate over the SHAs in this pack."""
 347         return imap(sha_to_hex, self._itersha())
 348
 349     def _itersha(self):
 350         for i in range(len(self)):
 351             yield self._unpack_name(i)
 352
 353     def objects_sha1(self):
 354         """Return the hex SHA1 over all the shas of all objects in this pack.
 355
 356         :note: This is used for the filename of the pack.
 357         """
 358         return iter_sha1(self._itersha())
 359
 360     def iterentries(self):
 361         """Iterate over the entries in this pack index.
 362
 363         :return: iterator over tuples with object name, offset in packfile and
 364             crc32 checksum.
 365         """
 366         for i in range(len(self)):
 367             yield self._unpack_entry(i)
 368
 369     def _read_fan_out_table(self, start_offset):
 370         ret = []
 371         for i in range(0x100):
 372             fanout_entry = self._contents[start_offset+i*4:start_offset+(i+1)*4]
 373             ret.append(struct.unpack(">L", fanout_entry)[0])
 374         return ret
 375
 376     def check(self):
 377         """Check that the stored checksum matches the actual checksum."""
 378         actual = self.calculate_checksum()
 379         stored = self.get_stored_checksum()
 380         if actual != stored:
 381             raise ChecksumMismatch(stored, actual)
 382
 383     def calculate_checksum(self):
 384         """Calculate the SHA1 checksum over this pack index.
 385
 386         :return: This is a 20-byte binary digest
 387         """
 388         return make_sha(self._contents[:-20]).digest()
 389
 390     def get_pack_checksum(self):
 391         """Return the SHA1 checksum stored for the corresponding packfile.
 392
 393         :return: 20-byte binary digest
 394         """
 395         return str(self._contents[-40:-20])
 396
 397     def get_stored_checksum(self):
 398         """Return the SHA1 checksum stored for this index.
 399
 400         :return: 20-byte binary digest
 401         """
 402         return str(self._contents[-20:])
 403
 404     def _object_index(self, sha):
 405         """See object_index.
 406
 407         :param sha: A *binary* SHA string. (20 characters long)_
 408         """
 409         assert len(sha) == 20
 410         idx = ord(sha[0])
 411         if idx == 0:
 412             start = 0
 413         else:
 414             start = self._fan_out_table[idx-1]
 415         end = self._fan_out_table[idx]
 416         i = bisect_find_sha(start, end, sha, self._unpack_name)
 417         if i is None:
 418             raise KeyError(sha)
 419         return self._unpack_offset(i)
 420
 421
 422 class PackIndex1(FilePackIndex):
 423     """Version 1 Pack Index file."""
 424
 425     def __init__(self, filename, file=None, contents=None, size=None):
 426         super(PackIndex1, self).__init__(filename, file, contents, size)
 427         self.version = 1
 428         self._fan_out_table = self._read_fan_out_table(0)
 429
 430     def _unpack_entry(self, i):
 431         (offset, name) = unpack_from(">L20s", self._contents,
 432                                      (0x100 * 4) + (i * 24))
 433         return (name, offset, None)
 434
 435     def _unpack_name(self, i):
 436         offset = (0x100 * 4) + (i * 24) + 4
 437         return self._contents[offset:offset+20]
 438
 439     def _unpack_offset(self, i):
 440         offset = (0x100 * 4) + (i * 24)
 441         return unpack_from(">L", self._contents, offset)[0]
 442
 443     def _unpack_crc32_checksum(self, i):
 444         # Not stored in v1 index files
 445         return None
 446
 447
 448 class PackIndex2(FilePackIndex):
 449     """Version 2 Pack Index file."""
 450
 451     def __init__(self, filename, file=None, contents=None, size=None):
 452         super(PackIndex2, self).__init__(filename, file, contents, size)
 453         assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
 454         (self.version, ) = unpack_from(">L", self._contents, 4)
 455         assert self.version == 2, "Version was %d" % self.version
 456         self._fan_out_table = self._read_fan_out_table(8)
 457         self._name_table_offset = 8 + 0x100 * 4
 458         self._crc32_table_offset = self._name_table_offset + 20 * len(self)
 459         self._pack_offset_table_offset = (self._crc32_table_offset +
 460                                           4 * len(self))
 461
 462     def _unpack_entry(self, i):
 463         return (self._unpack_name(i), self._unpack_offset(i),
 464                 self._unpack_crc32_checksum(i))
 465
 466     def _unpack_name(self, i):
 467         offset = self._name_table_offset + i * 20
 468         return self._contents[offset:offset+20]
 469
 470     def _unpack_offset(self, i):
 471         offset = self._pack_offset_table_offset + i * 4
 472         return unpack_from(">L", self._contents, offset)[0]
 473
 474     def _unpack_crc32_checksum(self, i):
 475         return unpack_from(">L", self._contents,
 476                           self._crc32_table_offset + i * 4)[0]
 477
 478
 479 def read_pack_header(read):
 480     """Read the header of a pack file.
 481
 482     :param read: Read function
 483     :return: Tuple with pack version and number of objects
 484     """
 485     header = read(12)
 486     assert header[:4] == "PACK"
 487     (version,) = unpack_from(">L", header, 4)
 488     assert version in (2, 3), "Version was %d" % version
 489     (num_objects,) = unpack_from(">L", header, 8)
 490     return (version, num_objects)
 491
 492
 493 def chunks_length(chunks):
 494     return sum(imap(len, chunks))
 495
 496
 497 def unpack_object(read_all, read_some=None):
 498     """Unpack a Git object.
 499
 500     :param read_all: Read function that blocks until the number of requested
 501         bytes are read.
 502     :param read_some: Read function that returns at least one byte, but may not
 503         return the number of bytes requested.
 504     :return: tuple with type, uncompressed data, compressed size and tail data.
 505     """
 506     if read_some is None:
 507         read_some = read_all
 508     bytes = take_msb_bytes(read_all)
 509     type = (bytes[0] >> 4) & 0x07
 510     size = bytes[0] & 0x0f
 511     for i, byte in enumerate(bytes[1:]):
 512         size += (byte & 0x7f) << ((i * 7) + 4)
 513     raw_base = len(bytes)
 514     if type == OFS_DELTA:
 515         bytes = take_msb_bytes(read_all)
 516         raw_base += len(bytes)
 517         assert not (bytes[-1] & 0x80)
 518         delta_base_offset = bytes[0] & 0x7f
 519         for byte in bytes[1:]:
 520             delta_base_offset += 1
 521             delta_base_offset <<= 7
 522             delta_base_offset += (byte & 0x7f)
 523         uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
 524         assert size == chunks_length(uncomp)
 525         return type, (delta_base_offset, uncomp), comp_len+raw_base, unused
 526     elif type == REF_DELTA:
 527         basename = read_all(20)
 528         raw_base += 20
 529         uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
 530         assert size == chunks_length(uncomp)
 531         return type, (basename, uncomp), comp_len+raw_base, unused
 532     else:
 533         uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
 534         assert chunks_length(uncomp) == size
 535         return type, uncomp, comp_len+raw_base, unused
 536
 537
 538 def _compute_object_size((num, obj)):
 539     """Compute the size of a unresolved object for use with LRUSizeCache."""
 540     if num in DELTA_TYPES:
 541         return chunks_length(obj[1])
 542     return chunks_length(obj)
 543
 544
 545 class PackStreamReader(object):
 546     """Class to read a pack stream.
 547
 548     The pack is read from a ReceivableProtocol using read() or recv() as
 549     appropriate.
 550     """
 551
 552     def __init__(self, read_all, read_some=None):
 553         self.read_all = read_all
 554         if read_some is None:
 555             self.read_some = read_all
 556         else:
 557             self.read_some = read_some
 558         self.sha = make_sha()
 559         self._offset = 0
 560         self._rbuf = StringIO()
 561         # trailer is a deque to avoid memory allocation on small reads
 562         self._trailer = deque()
 563
 564     def _read(self, read, size):
 565         """Read up to size bytes using the given callback.
 566
 567         As a side effect, update the verifier's hash (excluding the last 20
 568         bytes read) and write through to the output file.
 569
 570         :param read: The read callback to read from.
 571         :param size: The maximum number of bytes to read; the particular
 572             behavior is callback-specific.
 573         """
 574         data = read(size)
 575
 576         # maintain a trailer of the last 20 bytes we've read
 577         n = len(data)
 578         self._offset += n
 579         tn = len(self._trailer)
 580         if n >= 20:
 581             to_pop = tn
 582             to_add = 20
 583         else:
 584             to_pop = max(n + tn - 20, 0)
 585             to_add = n
 586         for _ in xrange(to_pop):
 587             self.sha.update(self._trailer.popleft())
 588         self._trailer.extend(data[-to_add:])
 589
 590         # hash everything but the trailer
 591         self.sha.update(data[:-to_add])
 592         return data
 593
 594     def _buf_len(self):
 595         buf = self._rbuf
 596         start = buf.tell()
 597         buf.seek(0, SEEK_END)
 598         end = buf.tell()
 599         buf.seek(start)
 600         return end - start
 601
 602     @property
 603     def offset(self):
 604         return self._offset - self._buf_len()
 605
 606     def read(self, size):
 607         """Read, blocking until size bytes are read."""
 608         buf_len = self._buf_len()
 609         if buf_len >= size:
 610             return self._rbuf.read(size)
 611         buf_data = self._rbuf.read()
 612         self._rbuf = StringIO()
 613         return buf_data + self._read(self.read_all, size - buf_len)
 614
 615     def recv(self, size):
 616         """Read up to size bytes, blocking until one byte is read."""
 617         buf_len = self._buf_len()
 618         if buf_len:
 619             data = self._rbuf.read(size)
 620             if size >= buf_len:
 621                 self._rbuf = StringIO()
 622             return data
 623         return self._read(self.read_some, size)
 624
 625     def __len__(self):
 626         return self._num_objects
 627
 628     def read_objects(self):
 629         """Read the objects in this pack file.
 630
 631         :raise AssertionError: if there is an error in the pack format.
 632         :raise ChecksumMismatch: if the checksum of the pack contents does not
 633             match the checksum in the pack trailer.
 634         :raise zlib.error: if an error occurred during zlib decompression.
 635         :raise IOError: if an error occurred writing to the output file.
 636         """
 637         pack_version, self._num_objects = read_pack_header(self.read)
 638         for i in xrange(self._num_objects):
 639             type, uncomp, comp_len, unused = unpack_object(self.read, self.recv)
 640             yield type, uncomp, comp_len
 641
 642             # prepend any unused data to current read buffer
 643             buf = StringIO()
 644             buf.write(unused)
 645             buf.write(self._rbuf.read())
 646             buf.seek(0)
 647             self._rbuf = buf
 648
 649         pack_sha = sha_to_hex(''.join([c for c in self._trailer]))
 650         calculated_sha = self.sha.hexdigest()
 651         if pack_sha != calculated_sha:
 652             raise ChecksumMismatch(pack_sha, calculated_sha)
 653
 654
 655 class PackObjectIterator(object):
 656
 657     def __init__(self, pack, progress=None):
 658         self.i = 0
 659         self.offset = pack._header_size
 660         self.num = len(pack)
 661         self.map = pack._file
 662         self._progress = progress
 663
 664     def __iter__(self):
 665         return self
 666
 667     def __len__(self):
 668         return self.num
 669
 670     def next(self):
 671         if self.i == self.num:
 672             raise StopIteration
 673         self.map.seek(self.offset)
 674         (type, obj, total_size, unused) = unpack_object(self.map.read)
 675         self.map.seek(self.offset)
 676         crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff
 677         ret = (self.offset, type, obj, crc32)
 678         self.offset += total_size
 679         if self._progress is not None:
 680             self._progress(self.i, self.num)
 681         self.i+=1
 682         return ret
 683
 684 def obj_sha(type, chunks):
 685     """Compute the SHA for a numeric type and object chunks."""
 686     sha = make_sha()
 687     sha.update(object_header(type, chunks_length(chunks)))
 688     for chunk in chunks:
 689         sha.update(chunk)
 690     return sha.digest()
 691
 692
 693 class PackData(object):
 694     """The data contained in a packfile.
 695
 696     Pack files can be accessed both sequentially for exploding a pack, and
 697     directly with the help of an index to retrieve a specific object.
 698
 699     The objects within are either complete or a delta aginst another.
 700
 701     The header is variable length. If the MSB of each byte is set then it
 702     indicates that the subsequent byte is still part of the header.
 703     For the first byte the next MS bits are the type, which tells you the type
 704     of object, and whether it is a delta. The LS byte is the lowest bits of the
 705     size. For each subsequent byte the LS 7 bits are the next MS bits of the
 706     size, i.e. the last byte of the header contains the MS bits of the size.
 707
 708     For the complete objects the data is stored as zlib deflated data.
 709     The size in the header is the uncompressed object size, so to uncompress
 710     you need to just keep feeding data to zlib until you get an object back,
 711     or it errors on bad data. This is done here by just giving the complete
 712     buffer from the start of the deflated object on. This is bad, but until I
 713     get mmap sorted out it will have to do.
 714
 715     Currently there are no integrity checks done. Also no attempt is made to
 716     try and detect the delta case, or a request for an object at the wrong
 717     position.  It will all just throw a zlib or KeyError.
 718     """
 719
 720     def __init__(self, filename, file=None, size=None):
 721         """Create a PackData object representing the pack in the given filename.
 722
 723         The file must exist and stay readable until the object is disposed of. It
 724         must also stay the same size. It will be mapped whenever needed.
 725
 726         Currently there is a restriction on the size of the pack as the python
 727         mmap implementation is flawed.
 728         """
 729         self._filename = filename
 730         self._size = size
 731         self._header_size = 12
 732         if file is None:
 733             self._file = GitFile(self._filename, 'rb')
 734         else:
 735             self._file = file
 736         (version, self._num_objects) = read_pack_header(self._file.read)
 737         self._offset_cache = LRUSizeCache(1024*1024*20,
 738             compute_size=_compute_object_size)
 739         self.pack = None
 740
 741     @classmethod
 742     def from_file(cls, file, size):
 743         return cls(str(file), file=file, size=size)
 744
 745     @classmethod
 746     def from_path(cls, path):
 747         return cls(filename=path)
 748
 749     def close(self):
 750         self._file.close()
 751
 752     def _get_size(self):
 753         if self._size is not None:
 754             return self._size
 755         self._size = os.path.getsize(self._filename)
 756         if self._size < self._header_size:
 757             errmsg = ("%s is too small for a packfile (%d < %d)" %
 758                       (self._filename, self._size, self._header_size))
 759             raise AssertionError(errmsg)
 760         return self._size
 761
 762     def __len__(self):
 763         """Returns the number of objects in this pack."""
 764         return self._num_objects
 765
 766     def calculate_checksum(self):
 767         """Calculate the checksum for this pack.
 768
 769         :return: 20-byte binary SHA1 digest
 770         """
 771         s = make_sha()
 772         self._file.seek(0)
 773         todo = self._get_size() - 20
 774         while todo > 0:
 775             x = self._file.read(min(todo, 1<<16))
 776             s.update(x)
 777             todo -= len(x)
 778         return s.digest()
 779
 780     def get_ref(self, sha):
 781         """Get the object for a ref SHA, only looking in this pack."""
 782         # TODO: cache these results
 783         if self.pack is None:
 784             raise KeyError(sha)
 785         offset = self.pack.index.object_index(sha)
 786         if not offset:
 787             raise KeyError(sha)
 788         type, obj = self.get_object_at(offset)
 789         return offset, type, obj
 790
 791     def resolve_object(self, offset, type, obj, get_ref=None):
 792         """Resolve an object, possibly resolving deltas when necessary.
 793
 794         :return: Tuple with object type and contents.
 795         """
 796         if type not in DELTA_TYPES:
 797             return type, obj
 798
 799         if get_ref is None:
 800             get_ref = self.get_ref
 801         if type == OFS_DELTA:
 802             (delta_offset, delta) = obj
 803             # TODO: clean up asserts and replace with nicer error messages
 804             assert isinstance(offset, int)
 805             assert isinstance(delta_offset, int)
 806             base_offset = offset-delta_offset
 807             type, base_obj = self.get_object_at(base_offset)
 808             assert isinstance(type, int)
 809         elif type == REF_DELTA:
 810             (basename, delta) = obj
 811             assert isinstance(basename, str) and len(basename) == 20
 812             base_offset, type, base_obj = get_ref(basename)
 813             assert isinstance(type, int)
 814         type, base_chunks = self.resolve_object(base_offset, type, base_obj)
 815         chunks = apply_delta(base_chunks, delta)
 816         # TODO(dborowitz): This can result in poor performance if large base
 817         # objects are separated from deltas in the pack. We should reorganize
 818         # so that we apply deltas to all objects in a chain one after the other
 819         # to optimize cache performance.
 820         if offset is not None:
 821             self._offset_cache[offset] = type, chunks
 822         return type, chunks
 823
 824     def iterobjects(self, progress=None):
 825         return PackObjectIterator(self, progress)
 826
 827     def iterentries(self, progress=None):
 828         """Yield entries summarizing the contents of this pack.
 829
 830         :param progress: Progress function, called with current and total
 831             object count.
 832         :return: iterator of tuples with (sha, offset, crc32)
 833         """
 834         for offset, type, obj, crc32 in self.iterobjects(progress=progress):
 835             assert isinstance(offset, int)
 836             assert isinstance(type, int)
 837             assert isinstance(obj, list) or isinstance(obj, tuple)
 838             type, obj = self.resolve_object(offset, type, obj)
 839             yield obj_sha(type, obj), offset, crc32
 840
 841     def sorted_entries(self, progress=None):
 842         """Return entries in this pack, sorted by SHA.
 843
 844         :param progress: Progress function, called with current and total
 845             object count
 846         :return: List of tuples with (sha, offset, crc32)
 847         """
 848         ret = list(self.iterentries(progress=progress))
 849         ret.sort()
 850         return ret
 851
 852     def create_index_v1(self, filename, progress=None):
 853         """Create a version 1 file for this data file.
 854
 855         :param filename: Index filename.
 856         :param progress: Progress report function
 857         :return: Checksum of index file
 858         """
 859         entries = self.sorted_entries(progress=progress)
 860         f = GitFile(filename, 'wb')
 861         try:
 862             return write_pack_index_v1(f, entries, self.calculate_checksum())
 863         finally:
 864             f.close()
 865
 866     def create_index_v2(self, filename, progress=None):
 867         """Create a version 2 index file for this data file.
 868
 869         :param filename: Index filename.
 870         :param progress: Progress report function
 871         :return: Checksum of index file
 872         """
 873         entries = self.sorted_entries(progress=progress)
 874         f = GitFile(filename, 'wb')
 875         try:
 876             return write_pack_index_v2(f, entries, self.calculate_checksum())
 877         finally:
 878             f.close()
 879
 880     def create_index(self, filename, progress=None,
 881                      version=2):
 882         """Create an  index file for this data file.
 883
 884         :param filename: Index filename.
 885         :param progress: Progress report function
 886         :return: Checksum of index file
 887         """
 888         if version == 1:
 889             return self.create_index_v1(filename, progress)
 890         elif version == 2:
 891             return self.create_index_v2(filename, progress)
 892         else:
 893             raise ValueError("unknown index format %d" % version)
 894
 895     def get_stored_checksum(self):
 896         """Return the expected checksum stored in this pack."""
 897         self._file.seek(self._get_size()-20)
 898         return self._file.read(20)
 899
 900     def check(self):
 901         """Check the consistency of this pack."""
 902         actual = self.calculate_checksum()
 903         stored = self.get_stored_checksum()
 904         if actual != stored:
 905             raise ChecksumMismatch(stored, actual)
 906
 907     def get_object_at(self, offset):
 908         """Given an offset in to the packfile return the object that is there.
 909
 910         Using the associated index the location of an object can be looked up,
 911         and then the packfile can be asked directly for that object using this
 912         function.
 913         """
 914         if offset in self._offset_cache:
 915             return self._offset_cache[offset]
 916         assert isinstance(offset, long) or isinstance(offset, int),\
 917                 "offset was %r" % offset
 918         assert offset >= self._header_size
 919         self._file.seek(offset)
 920         return unpack_object(self._file.read)[:2]
 921
 922
 923 class ThinPackData(PackData):
 924     """PackData for thin packs, which require an ObjectStore for resolving."""
 925
 926     def __init__(self, resolve_ext_ref, *args, **kwargs):
 927         super(ThinPackData, self).__init__(*args, **kwargs)
 928         self.resolve_ext_ref = resolve_ext_ref
 929
 930     @classmethod
 931     def from_file(cls, resolve_ext_ref, file, size):
 932         return cls(resolve_ext_ref, str(file), file=file, size=size)
 933
 934     def get_ref(self, sha):
 935         """Resolve a reference looking in both this pack and the store."""
 936         try:
 937             # As part of completing a pack we create a Pack object with a
 938             # ThinPackData and a full PackIndex, so check in the index first if
 939             # possible.
 940             # TODO(dborowitz): reevaluate this when the pack completion code is
 941             # rewritten.
 942             return super(ThinPackData, self).get_ref(sha)
 943         except KeyError:
 944             type, obj = self.resolve_ext_ref(sha)
 945             return None, type, obj
 946
 947     def iterentries(self, progress=None):
 948         """Yield entries summarizing the contents of this pack.
 949
 950         :param progress: Progress function, called with current and
 951             total object count.
 952
 953         This will yield tuples with (sha, offset, crc32)
 954         """
 955         found = {}
 956         postponed = defaultdict(list)
 957
 958         class Postpone(Exception):
 959             """Raised to postpone delta resolving."""
 960
 961             def __init__(self, sha):
 962                 self.sha = sha
 963
 964         def get_ref_text(sha):
 965             assert len(sha) == 20
 966             if sha in found:
 967                 offset = found[sha]
 968                 type, obj = self.get_object_at(offset)
 969                 return offset, type, obj
 970             try:
 971                 return self.get_ref(sha)
 972             except KeyError:
 973                 raise Postpone(sha)
 974
 975         extra = []
 976         todo = chain(self.iterobjects(progress=progress), extra)
 977         for (offset, type, obj, crc32) in todo:
 978             assert isinstance(offset, int)
 979             if obj is None:
 980                 # Inflate postponed delta
 981                 obj, type = self.get_object_at(offset)
 982             assert isinstance(type, int)
 983             assert isinstance(obj, list) or isinstance(obj, tuple)
 984             try:
 985                 type, obj = self.resolve_object(offset, type, obj, get_ref_text)
 986             except Postpone, e:
 987                 # Save memory by not storing the inflated obj in postponed
 988                 postponed[e.sha].append((offset, type, None, crc32))
 989             else:
 990                 sha = obj_sha(type, obj)
 991                 found[sha] = offset
 992                 yield sha, offset, crc32
 993                 extra.extend(postponed.pop(sha, []))
 994         if postponed:
 995             raise KeyError([sha_to_hex(h) for h in postponed.keys()])
 996
 997
 998 class SHA1Reader(object):
 999     """Wrapper around a file-like object that remembers the SHA1 of its data."""
1000
1001     def __init__(self, f):
1002         self.f = f
1003         self.sha1 = make_sha("")
1004
1005     def read(self, num=None):
1006         data = self.f.read(num)
1007         self.sha1.update(data)
1008         return data
1009
1010     def check_sha(self):
1011         stored = self.f.read(20)
1012         if stored != self.sha1.digest():
1013             raise ChecksumMismatch(self.sha1.hexdigest(), sha_to_hex(stored))
1014
1015     def close(self):
1016         return self.f.close()
1017
1018     def tell(self):
1019         return self.f.tell()
1020
1021
1022 class SHA1Writer(object):
1023     """Wrapper around a file-like object that remembers the SHA1 of its data."""
1024
1025     def __init__(self, f):
1026         self.f = f
1027         self.sha1 = make_sha("")
1028
1029     def write(self, data):
1030         self.sha1.update(data)
1031         self.f.write(data)
1032
1033     def write_sha(self):
1034         sha = self.sha1.digest()
1035         assert len(sha) == 20
1036         self.f.write(sha)
1037         return sha
1038
1039     def close(self):
1040         sha = self.write_sha()
1041         self.f.close()
1042         return sha
1043
1044     def tell(self):
1045         return self.f.tell()
1046
1047
1048 def write_pack_object(f, type, object):
1049     """Write pack object to a file.
1050
1051     :param f: File to write to
1052     :param type: Numeric type of the object
1053     :param object: Object to write
1054     :return: Tuple with offset at which the object was written, and crc32
1055     """
1056     offset = f.tell()
1057     packed_data_hdr = ""
1058     if type == OFS_DELTA:
1059         (delta_base_offset, object) = object
1060     elif type == REF_DELTA:
1061         (basename, object) = object
1062     size = len(object)
1063     c = (type << 4) | (size & 15)
1064     size >>= 4
1065     while size:
1066         packed_data_hdr += (chr(c | 0x80))
1067         c = size & 0x7f
1068         size >>= 7
1069     packed_data_hdr += chr(c)
1070     if type == OFS_DELTA:
1071         ret = [delta_base_offset & 0x7f]
1072         delta_base_offset >>= 7
1073         while delta_base_offset:
1074             delta_base_offset -= 1
1075             ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
1076             delta_base_offset >>= 7
1077         packed_data_hdr += "".join([chr(x) for x in ret])
1078     elif type == REF_DELTA:
1079         assert len(basename) == 20
1080         packed_data_hdr += basename
1081     packed_data = packed_data_hdr + zlib.compress(object)
1082     f.write(packed_data)
1083     return (offset, (zlib.crc32(packed_data) & 0xffffffff))
1084
1085
1086 def write_pack(filename, objects, num_objects):
1087     """Write a new pack data file.
1088
1089     :param filename: Path to the new pack file (without .pack extension)
1090     :param objects: Iterable over (object, path) tuples to write
1091     :param num_objects: Number of objects to write
1092     :return: Tuple with checksum of pack file and index file
1093     """
1094     f = GitFile(filename + ".pack", 'wb')
1095     try:
1096         entries, data_sum = write_pack_data(f, objects, num_objects)
1097     finally:
1098         f.close()
1099     entries.sort()
1100     f = GitFile(filename + ".idx", 'wb')
1101     try:
1102         return data_sum, write_pack_index_v2(f, entries, data_sum)
1103     finally:
1104         f.close()
1105
1106
1107 def write_pack_header(f, num_objects):
1108     """Write a pack header for the given number of objects."""
1109     f.write('PACK')                          # Pack header
1110     f.write(struct.pack('>L', 2))            # Pack version
1111     f.write(struct.pack('>L', num_objects))  # Number of objects in pack
1112
1113
1114 def write_pack_data(f, objects, num_objects, window=10):
1115     """Write a new pack data file.
1116
1117     :param f: File to write to
1118     :param objects: Iterable over (object, path) tuples to write
1119     :param num_objects: Number of objects to write
1120     :param window: Sliding window size for searching for deltas; currently
1121                    unimplemented
1122     :return: List with (name, offset, crc32 checksum) entries, pack checksum
1123     """
1124     recency = list(objects)
1125     # FIXME: Somehow limit delta depth
1126     # FIXME: Make thin-pack optional (its not used when cloning a pack)
1127     # Build a list of objects ordered by the magic Linus heuristic
1128     # This helps us find good objects to diff against us
1129     magic = []
1130     for obj, path in recency:
1131         magic.append( (obj.type_num, path, 1, -obj.raw_length(), obj) )
1132     magic.sort()
1133     # Build a map of objects and their index in magic - so we can find
1134     # preceeding objects to diff against
1135     offs = {}
1136     for i in range(len(magic)):
1137         offs[magic[i][4]] = i
1138     # Write the pack
1139     entries = []
1140     f = SHA1Writer(f)
1141     write_pack_header(f, num_objects)
1142     for o, path in recency:
1143         sha1 = o.sha().digest()
1144         orig_t = o.type_num
1145         raw = o.as_raw_string()
1146         winner = raw
1147         t = orig_t
1148         #for i in range(offs[o]-window, window):
1149         #    if i < 0 or i >= len(offs): continue
1150         #    b = magic[i][4]
1151         #    if b.type_num != orig_t: continue
1152         #    base = b.as_raw_string()
1153         #    delta = create_delta(base, raw)
1154         #    if len(delta) < len(winner):
1155         #        winner = delta
1156         #        t = 6 if magic[i][2] == 1 else 7
1157         offset, crc32 = write_pack_object(f, t, winner)
1158         entries.append((sha1, offset, crc32))
1159     return entries, f.write_sha()
1160
1161
1162 def write_pack_index_v1(f, entries, pack_checksum):
1163     """Write a new pack index file.
1164
1165     :param f: A file-like object to write to
1166     :param entries: List of tuples with object name (sha), offset_in_pack,
1167         and crc32_checksum.
1168     :param pack_checksum: Checksum of the pack file.
1169     :return: The SHA of the written index file
1170     """
1171     f = SHA1Writer(f)
1172     fan_out_table = defaultdict(lambda: 0)
1173     for (name, offset, entry_checksum) in entries:
1174         fan_out_table[ord(name[0])] += 1
1175     # Fan-out table
1176     for i in range(0x100):
1177         f.write(struct.pack(">L", fan_out_table[i]))
1178         fan_out_table[i+1] += fan_out_table[i]
1179     for (name, offset, entry_checksum) in entries:
1180         f.write(struct.pack(">L20s", offset, name))
1181     assert len(pack_checksum) == 20
1182     f.write(pack_checksum)
1183     return f.write_sha()
1184
1185
1186 def create_delta(base_buf, target_buf):
1187     """Use python difflib to work out how to transform base_buf to target_buf.
1188
1189     :param base_buf: Base buffer
1190     :param target_buf: Target buffer
1191     """
1192     assert isinstance(base_buf, str)
1193     assert isinstance(target_buf, str)
1194     out_buf = ""
1195     # write delta header
1196     def encode_size(size):
1197         ret = ""
1198         c = size & 0x7f
1199         size >>= 7
1200         while size:
1201             ret += chr(c | 0x80)
1202             c = size & 0x7f
1203             size >>= 7
1204         ret += chr(c)
1205         return ret
1206     out_buf += encode_size(len(base_buf))
1207     out_buf += encode_size(len(target_buf))
1208     # write out delta opcodes
1209     seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
1210     for opcode, i1, i2, j1, j2 in seq.get_opcodes():
1211         # Git patch opcodes don't care about deletes!
1212         #if opcode == "replace" or opcode == "delete":
1213         #    pass
1214         if opcode == "equal":
1215             # If they are equal, unpacker will use data from base_buf
1216             # Write out an opcode that says what range to use
1217             scratch = ""
1218             op = 0x80
1219             o = i1
1220             for i in range(4):
1221                 if o & 0xff << i*8:
1222                     scratch += chr((o >> i*8) & 0xff)
1223                     op |= 1 << i
1224             s = i2 - i1
1225             for i in range(2):
1226                 if s & 0xff << i*8:
1227                     scratch += chr((s >> i*8) & 0xff)
1228                     op |= 1 << (4+i)
1229             out_buf += chr(op)
1230             out_buf += scratch
1231         if opcode == "replace" or opcode == "insert":
1232             # If we are replacing a range or adding one, then we just
1233             # output it to the stream (prefixed by its size)
1234             s = j2 - j1
1235             o = j1
1236             while s > 127:
1237                 out_buf += chr(127)
1238                 out_buf += target_buf[o:o+127]
1239                 s -= 127
1240                 o += 127
1241             out_buf += chr(s)
1242             out_buf += target_buf[o:o+s]
1243     return out_buf
1244
1245
1246 def apply_delta(src_buf, delta):
1247     """Based on the similar function in git's patch-delta.c.
1248
1249     :param src_buf: Source buffer
1250     :param delta: Delta instructions
1251     """
1252     if type(src_buf) != str:
1253         src_buf = "".join(src_buf)
1254     if type(delta) != str:
1255         delta = "".join(delta)
1256     out = []
1257     index = 0
1258     delta_length = len(delta)
1259     def get_delta_header_size(delta, index):
1260         size = 0
1261         i = 0
1262         while delta:
1263             cmd = ord(delta[index])
1264             index += 1
1265             size |= (cmd & ~0x80) << i
1266             i += 7
1267             if not cmd & 0x80:
1268                 break
1269         return size, index
1270     src_size, index = get_delta_header_size(delta, index)
1271     dest_size, index = get_delta_header_size(delta, index)
1272     assert src_size == len(src_buf), "%d vs %d" % (src_size, len(src_buf))
1273     while index < delta_length:
1274         cmd = ord(delta[index])
1275         index += 1
1276         if cmd & 0x80:
1277             cp_off = 0
1278             for i in range(4):
1279                 if cmd & (1 << i):
1280                     x = ord(delta[index])
1281                     index += 1
1282                     cp_off |= x << (i * 8)
1283             cp_size = 0
1284             for i in range(3):
1285                 if cmd & (1 << (4+i)):
1286                     x = ord(delta[index])
1287                     index += 1
1288                     cp_size |= x << (i * 8)
1289             if cp_size == 0:
1290                 cp_size = 0x10000
1291             if (cp_off + cp_size < cp_size or
1292                 cp_off + cp_size > src_size or
1293                 cp_size > dest_size):
1294                 break
1295             out.append(src_buf[cp_off:cp_off+cp_size])
1296         elif cmd != 0:
1297             out.append(delta[index:index+cmd])
1298             index += cmd
1299         else:
1300             raise ApplyDeltaError("Invalid opcode 0")
1301
1302     if index != delta_length:
1303         raise ApplyDeltaError("delta not empty: %r" % delta[index:])
1304
1305     if dest_size != chunks_length(out):
1306         raise ApplyDeltaError("dest size incorrect")
1307
1308     return out
1309
1310
1311 def write_pack_index_v2(f, entries, pack_checksum):
1312     """Write a new pack index file.
1313
1314     :param f: File-like object to write to
1315     :param entries: List of tuples with object name (sha), offset_in_pack, and
1316         crc32_checksum.
1317     :param pack_checksum: Checksum of the pack file.
1318     :return: The SHA of the index file written
1319     """
1320     f = SHA1Writer(f)
1321     f.write('\377tOc') # Magic!
1322     f.write(struct.pack(">L", 2))
1323     fan_out_table = defaultdict(lambda: 0)
1324     for (name, offset, entry_checksum) in entries:
1325         fan_out_table[ord(name[0])] += 1
1326     # Fan-out table
1327     for i in range(0x100):
1328         f.write(struct.pack(">L", fan_out_table[i]))
1329         fan_out_table[i+1] += fan_out_table[i]
1330     for (name, offset, entry_checksum) in entries:
1331         f.write(name)
1332     for (name, offset, entry_checksum) in entries:
1333         f.write(struct.pack(">L", entry_checksum))
1334     for (name, offset, entry_checksum) in entries:
1335         # FIXME: handle if MSBit is set in offset
1336         f.write(struct.pack(">L", offset))
1337     # FIXME: handle table for pack files > 8 Gb
1338     assert len(pack_checksum) == 20
1339     f.write(pack_checksum)
1340     return f.write_sha()
1341
1342
1343 class Pack(object):
1344     """A Git pack object."""
1345
1346     def __init__(self, basename):
1347         self._basename = basename
1348         self._data = None
1349         self._idx = None
1350         self._idx_path = self._basename + ".idx"
1351         self._data_path = self._basename + ".pack"
1352         self._data_load = lambda: PackData(self._data_path)
1353         self._idx_load = lambda: load_pack_index(self._idx_path)
1354
1355     @classmethod
1356     def from_lazy_objects(self, data_fn, idx_fn):
1357         """Create a new pack object from callables to load pack data and
1358         index objects."""
1359         ret = Pack("")
1360         ret._data_load = data_fn
1361         ret._idx_load = idx_fn
1362         return ret
1363
1364     @classmethod
1365     def from_objects(self, data, idx):
1366         """Create a new pack object from pack data and index objects."""
1367         ret = Pack("")
1368         ret._data_load = lambda: data
1369         ret._idx_load = lambda: idx
1370         return ret
1371
1372     def name(self):
1373         """The SHA over the SHAs of the objects in this pack."""
1374         return self.index.objects_sha1()
1375
1376     @property
1377     def data(self):
1378         """The pack data object being used."""
1379         if self._data is None:
1380             self._data = self._data_load()
1381             self._data.pack = self
1382             assert len(self.index) == len(self._data)
1383             idx_stored_checksum = self.index.get_pack_checksum()
1384             data_stored_checksum = self._data.get_stored_checksum()
1385             if idx_stored_checksum != data_stored_checksum:
1386                 raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
1387                                        sha_to_hex(data_stored_checksum))
1388         return self._data
1389
1390     @property
1391     def index(self):
1392         """The index being used.
1393
1394         :note: This may be an in-memory index
1395         """
1396         if self._idx is None:
1397             self._idx = self._idx_load()
1398         return self._idx
1399
1400     def close(self):
1401         if self._data is not None:
1402             self._data.close()
1403         self.index.close()
1404
1405     def __eq__(self, other):
1406         return type(self) == type(other) and self.index == other.index
1407
1408     def __len__(self):
1409         """Number of entries in this pack."""
1410         return len(self.index)
1411
1412     def __repr__(self):
1413         return "%s(%r)" % (self.__class__.__name__, self._basename)
1414
1415     def __iter__(self):
1416         """Iterate over all the sha1s of the objects in this pack."""
1417         return iter(self.index)
1418
1419     def check(self):
1420         """Check the integrity of this pack.
1421
1422         :raise ChecksumMismatch: if a checksum for the index or data is wrong
1423         """
1424         self.index.check()
1425         self.data.check()
1426         for obj in self.iterobjects():
1427             obj.check()
1428         # TODO: object connectivity checks
1429
1430     def get_stored_checksum(self):
1431         return self.data.get_stored_checksum()
1432
1433     def __contains__(self, sha1):
1434         """Check whether this pack contains a particular SHA1."""
1435         try:
1436             self.index.object_index(sha1)
1437             return True
1438         except KeyError:
1439             return False
1440
1441     def get_raw(self, sha1):
1442         offset = self.index.object_index(sha1)
1443         obj_type, obj = self.data.get_object_at(offset)
1444         if type(offset) is long:
1445           offset = int(offset)
1446         type_num, chunks = self.data.resolve_object(offset, obj_type, obj)
1447         return type_num, "".join(chunks)
1448
1449     def __getitem__(self, sha1):
1450         """Retrieve the specified SHA1."""
1451         type, uncomp = self.get_raw(sha1)
1452         return ShaFile.from_raw_string(type, uncomp)
1453
1454     def iterobjects(self):
1455         """Iterate over the objects in this pack."""
1456         for offset, type, obj, crc32 in self.data.iterobjects():
1457             assert isinstance(offset, int)
1458             yield ShaFile.from_raw_chunks(
1459               *self.data.resolve_object(offset, type, obj))
1460
1461
1462 try:
1463     from dulwich._pack import apply_delta, bisect_find_sha
1464 except ImportError:
1465     pass