1 # pack.py -- For dealing with packed git objects.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) a later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Classes for dealing with packed git objects.
22 A pack is a compact representation of a bunch of objects, stored
23 using deltas where possible.
25 They have two parts, the pack file, which stores the data, and an index
26 that tells you where the data is.
28 To find an object you look in all of the index files 'til you find a
29 match for the object name. You then use the pointer got from this as
30 a pointer in to the corresponding packfile.
34 from collections import defaultdict
36 from dulwich._compat import defaultdict
39 from cStringIO import (
42 from collections import (
46 from itertools import (
60 from struct import unpack_from
62 from dulwich._compat import unpack_from
67 from dulwich.errors import (
71 from dulwich.file import GitFile
72 from dulwich.lru_cache import (
75 from dulwich._compat import (
80 from dulwich.objects import (
87 supports_mmap_offset = (sys.version_info[0] >= 3 or
88 (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
94 DELTA_TYPES = (OFS_DELTA, REF_DELTA)
97 def take_msb_bytes(read, crc32=None):
98 """Read bytes marked with most significant bit.
100 :param read: Read function
103 while len(ret) == 0 or ret[-1] & 0x80:
105 if crc32 is not None:
106 crc32 = binascii.crc32(b, crc32)
111 class UnpackedObject(object):
112 """Class encapsulating an object unpacked from a pack file.
114 These objects should only be created from within unpack_object. Most
115 members start out as empty and are filled in at various points by
116 read_zlib_chunks, unpack_object, DeltaChainIterator, etc.
118 End users of this object should take care that the function they're getting
119 this object from is guaranteed to set the members they need.
123 'offset', # Offset in its pack.
124 '_sha', # Cached binary SHA.
125 'obj_type_num', # Type of this object.
126 'obj_chunks', # Decompressed and delta-resolved chunks.
127 'pack_type_num', # Type of this object in the pack (may be a delta).
128 'delta_base', # Delta base offset or SHA.
129 'comp_chunks', # Compressed object chunks.
130 'decomp_chunks', # Decompressed object chunks.
131 'decomp_len', # Decompressed length of this object.
135 # TODO(dborowitz): read_zlib_chunks and unpack_object could very well be
136 # methods of this object.
137 def __init__(self, pack_type_num, delta_base, decomp_len, crc32):
140 self.pack_type_num = pack_type_num
141 self.delta_base = delta_base
142 self.comp_chunks = None
143 self.decomp_chunks = []
144 self.decomp_len = decomp_len
147 if pack_type_num in DELTA_TYPES:
148 self.obj_type_num = None
149 self.obj_chunks = None
151 self.obj_type_num = pack_type_num
152 self.obj_chunks = self.decomp_chunks
153 self.delta_base = delta_base
156 """Return the binary SHA of this object."""
157 if self._sha is None:
158 self._sha = obj_sha(self.obj_type_num, self.obj_chunks)
162 """Return a ShaFile from this object."""
163 return ShaFile.from_raw_chunks(self.obj_type_num, self.obj_chunks)
165 # Only provided for backwards compatibility with code that expects either
166 # chunks or a delta tuple.
168 """Return the decompressed chunks, or (delta base, delta chunks)."""
169 if self.pack_type_num in DELTA_TYPES:
170 return (self.delta_base, self.decomp_chunks)
172 return self.decomp_chunks
174 def __eq__(self, other):
175 if not isinstance(other, UnpackedObject):
177 for slot in self.__slots__:
178 if getattr(self, slot) != getattr(other, slot):
182 def __ne__(self, other):
183 return not (self == other)
186 data = ['%s=%r' % (s, getattr(self, s)) for s in self.__slots__]
187 return '%s(%s)' % (self.__class__.__name__, ', '.join(data))
193 def read_zlib_chunks(read_some, unpacked, include_comp=False,
194 buffer_size=_ZLIB_BUFSIZE):
195 """Read zlib data from a buffer.
197 This function requires that the buffer have additional data following the
198 compressed data, which is guaranteed to be the case for git pack files.
200 :param read_some: Read function that returns at least one byte, but may
201 return less than the requested size.
202 :param unpacked: An UnpackedObject to write result data to. If its crc32
203 attr is not None, the CRC32 of the compressed bytes will be computed
204 using this starting CRC32.
205 After this function, will have the following attrs set:
206 * comp_chunks (if include_comp is True)
210 :param include_comp: If True, include compressed data in the result.
211 :param buffer_size: Size of the read buffer.
212 :return: Leftover unused data from the decompression.
213 :raise zlib.error: if a decompression error occurred.
215 if unpacked.decomp_len <= -1:
216 raise ValueError('non-negative zlib data stream size expected')
217 decomp_obj = zlib.decompressobj()
220 decomp_chunks = unpacked.decomp_chunks
222 crc32 = unpacked.crc32
225 add = read_some(buffer_size)
227 raise zlib.error('EOF before end of zlib stream')
228 comp_chunks.append(add)
229 decomp = decomp_obj.decompress(add)
230 decomp_len += len(decomp)
231 decomp_chunks.append(decomp)
232 unused = decomp_obj.unused_data
235 if crc32 is not None:
236 crc32 = binascii.crc32(add[:-left], crc32)
238 comp_chunks[-1] = add[:-left]
240 elif crc32 is not None:
241 crc32 = binascii.crc32(add, crc32)
242 if crc32 is not None:
245 if decomp_len != unpacked.decomp_len:
246 raise zlib.error('decompressed data does not match expected size')
248 unpacked.crc32 = crc32
250 unpacked.comp_chunks = comp_chunks
255 """Return the hexdigest of the SHA1 over a set of names.
257 :param iter: Iterator over string objects
258 :return: 40-byte hex sha1 digest
263 return sha1.hexdigest()
266 def load_pack_index(path):
267 """Load an index file by path.
269 :param filename: Path to the index file
270 :return: A PackIndex loaded from the given path
272 f = GitFile(path, 'rb')
274 return load_pack_index_file(path, f)
279 def _load_file_contents(f, size=None):
280 fileno = getattr(f, 'fileno', None)
281 # Attempt to use mmap if possible
282 if fileno is not None:
285 size = os.fstat(fd).st_size
288 contents = mmap.mmap(fd, size, access=mmap.ACCESS_READ)
293 return contents, size
296 return contents, size
299 def load_pack_index_file(path, f):
300 """Load an index file from a file-like object.
302 :param path: Path for the index file
303 :param f: File-like object
304 :return: A PackIndex loaded from the given file
306 contents, size = _load_file_contents(f)
307 if contents[:4] == '\377tOc':
308 version = struct.unpack('>L', contents[4:8])[0]
310 return PackIndex2(path, file=f, contents=contents,
313 raise KeyError('Unknown pack index format %d' % version)
315 return PackIndex1(path, file=f, contents=contents, size=size)
318 def bisect_find_sha(start, end, sha, unpack_name):
319 """Find a SHA in a data blob with sorted SHAs.
321 :param start: Start index of range to search
322 :param end: End index of range to search
323 :param sha: Sha to find
324 :param unpack_name: Callback to retrieve SHA by index
325 :return: Index of the SHA, or None if it wasn't found
330 file_sha = unpack_name(i)
331 x = cmp(file_sha, sha)
341 class PackIndex(object):
342 """An index in to a packfile.
344 Given a sha id of an object a pack index can tell you the location in the
345 packfile of that object if it has it.
348 def __eq__(self, other):
349 if not isinstance(other, PackIndex):
352 for (name1, _, _), (name2, _, _) in izip(self.iterentries(),
353 other.iterentries()):
358 def __ne__(self, other):
359 return not self.__eq__(other)
362 """Return the number of entries in this pack index."""
363 raise NotImplementedError(self.__len__)
366 """Iterate over the SHAs in this pack."""
367 return imap(sha_to_hex, self._itersha())
369 def iterentries(self):
370 """Iterate over the entries in this pack index.
372 :return: iterator over tuples with object name, offset in packfile and
375 raise NotImplementedError(self.iterentries)
377 def get_pack_checksum(self):
378 """Return the SHA1 checksum stored for the corresponding packfile.
380 :return: 20-byte binary digest
382 raise NotImplementedError(self.get_pack_checksum)
384 def object_index(self, sha):
385 """Return the index in to the corresponding packfile for the object.
387 Given the name of an object it will return the offset that object
388 lives at within the corresponding pack file. If the pack file doesn't
389 have the object then None will be returned.
392 sha = hex_to_sha(sha)
393 return self._object_index(sha)
395 def _object_index(self, sha):
398 :param sha: A *binary* SHA string. (20 characters long)_
400 raise NotImplementedError(self._object_index)
402 def objects_sha1(self):
403 """Return the hex SHA1 over all the shas of all objects in this pack.
405 :note: This is used for the filename of the pack.
407 return iter_sha1(self._itersha())
410 """Yield all the SHA1's of the objects in the index, sorted."""
411 raise NotImplementedError(self._itersha)
414 class MemoryPackIndex(PackIndex):
415 """Pack index that is stored entirely in memory."""
417 def __init__(self, entries, pack_checksum=None):
418 """Create a new MemoryPackIndex.
420 :param entries: Sequence of name, idx, crc32 (sorted)
421 :param pack_checksum: Optional pack checksum
424 for name, idx, crc32 in entries:
425 self._by_sha[name] = idx
426 self._entries = entries
427 self._pack_checksum = pack_checksum
429 def get_pack_checksum(self):
430 return self._pack_checksum
433 return len(self._entries)
435 def _object_index(self, sha):
436 return self._by_sha[sha][0]
439 return iter(self._by_sha)
441 def iterentries(self):
442 return iter(self._entries)
445 class FilePackIndex(PackIndex):
446 """Pack index that is based on a file.
448 To do the loop it opens the file, and indexes first 256 4 byte groups
449 with the first byte of the sha id. The value in the four byte group indexed
450 is the end of the group that shares the same starting byte. Subtract one
451 from the starting byte and index again to find the start of the group.
452 The values are sorted by sha id within the group, so do the math to find
453 the start and end offset and then bisect in to find if the value is present.
456 def __init__(self, filename, file=None, contents=None, size=None):
457 """Create a pack index object.
459 Provide it with the name of the index file to consider, and it will map
460 it whenever required.
462 self._filename = filename
463 # Take the size now, so it can be checked each time we map the file to
464 # ensure that it hasn't changed.
466 self._file = GitFile(filename, 'rb')
470 self._contents, self._size = _load_file_contents(self._file, size)
472 self._contents, self._size = (contents, size)
474 def __eq__(self, other):
475 # Quick optimization:
476 if (isinstance(other, FilePackIndex) and
477 self._fan_out_table != other._fan_out_table):
480 return super(FilePackIndex, self).__eq__(other)
484 if getattr(self._contents, "close", None) is not None:
485 self._contents.close()
488 """Return the number of entries in this pack index."""
489 return self._fan_out_table[-1]
491 def _unpack_entry(self, i):
492 """Unpack the i-th entry in the index file.
494 :return: Tuple with object name (SHA), offset in pack file and CRC32
497 raise NotImplementedError(self._unpack_entry)
499 def _unpack_name(self, i):
500 """Unpack the i-th name from the index file."""
501 raise NotImplementedError(self._unpack_name)
503 def _unpack_offset(self, i):
504 """Unpack the i-th object offset from the index file."""
505 raise NotImplementedError(self._unpack_offset)
507 def _unpack_crc32_checksum(self, i):
508 """Unpack the crc32 checksum for the i-th object from the index file."""
509 raise NotImplementedError(self._unpack_crc32_checksum)
512 for i in range(len(self)):
513 yield self._unpack_name(i)
515 def iterentries(self):
516 """Iterate over the entries in this pack index.
518 :return: iterator over tuples with object name, offset in packfile and
521 for i in range(len(self)):
522 yield self._unpack_entry(i)
524 def _read_fan_out_table(self, start_offset):
526 for i in range(0x100):
527 fanout_entry = self._contents[start_offset+i*4:start_offset+(i+1)*4]
528 ret.append(struct.unpack('>L', fanout_entry)[0])
532 """Check that the stored checksum matches the actual checksum."""
533 actual = self.calculate_checksum()
534 stored = self.get_stored_checksum()
536 raise ChecksumMismatch(stored, actual)
538 def calculate_checksum(self):
539 """Calculate the SHA1 checksum over this pack index.
541 :return: This is a 20-byte binary digest
543 return make_sha(self._contents[:-20]).digest()
545 def get_pack_checksum(self):
546 """Return the SHA1 checksum stored for the corresponding packfile.
548 :return: 20-byte binary digest
550 return str(self._contents[-40:-20])
552 def get_stored_checksum(self):
553 """Return the SHA1 checksum stored for this index.
555 :return: 20-byte binary digest
557 return str(self._contents[-20:])
559 def _object_index(self, sha):
562 :param sha: A *binary* SHA string. (20 characters long)_
564 assert len(sha) == 20
569 start = self._fan_out_table[idx-1]
570 end = self._fan_out_table[idx]
571 i = bisect_find_sha(start, end, sha, self._unpack_name)
574 return self._unpack_offset(i)
577 class PackIndex1(FilePackIndex):
578 """Version 1 Pack Index file."""
580 def __init__(self, filename, file=None, contents=None, size=None):
581 super(PackIndex1, self).__init__(filename, file, contents, size)
583 self._fan_out_table = self._read_fan_out_table(0)
585 def _unpack_entry(self, i):
586 (offset, name) = unpack_from('>L20s', self._contents,
587 (0x100 * 4) + (i * 24))
588 return (name, offset, None)
590 def _unpack_name(self, i):
591 offset = (0x100 * 4) + (i * 24) + 4
592 return self._contents[offset:offset+20]
594 def _unpack_offset(self, i):
595 offset = (0x100 * 4) + (i * 24)
596 return unpack_from('>L', self._contents, offset)[0]
598 def _unpack_crc32_checksum(self, i):
599 # Not stored in v1 index files
603 class PackIndex2(FilePackIndex):
604 """Version 2 Pack Index file."""
606 def __init__(self, filename, file=None, contents=None, size=None):
607 super(PackIndex2, self).__init__(filename, file, contents, size)
608 if self._contents[:4] != '\377tOc':
609 raise AssertionError('Not a v2 pack index file')
610 (self.version, ) = unpack_from('>L', self._contents, 4)
611 if self.version != 2:
612 raise AssertionError('Version was %d' % self.version)
613 self._fan_out_table = self._read_fan_out_table(8)
614 self._name_table_offset = 8 + 0x100 * 4
615 self._crc32_table_offset = self._name_table_offset + 20 * len(self)
616 self._pack_offset_table_offset = (self._crc32_table_offset +
618 self._pack_offset_largetable_offset = (self._pack_offset_table_offset +
621 def _unpack_entry(self, i):
622 return (self._unpack_name(i), self._unpack_offset(i),
623 self._unpack_crc32_checksum(i))
625 def _unpack_name(self, i):
626 offset = self._name_table_offset + i * 20
627 return self._contents[offset:offset+20]
629 def _unpack_offset(self, i):
630 offset = self._pack_offset_table_offset + i * 4
631 offset = unpack_from('>L', self._contents, offset)[0]
633 offset = self._pack_offset_largetable_offset + (offset&(2**31-1)) * 8L
634 offset = unpack_from('>Q', self._contents, offset)[0]
637 def _unpack_crc32_checksum(self, i):
638 return unpack_from('>L', self._contents,
639 self._crc32_table_offset + i * 4)[0]
642 def read_pack_header(read):
643 """Read the header of a pack file.
645 :param read: Read function
646 :return: Tuple of (pack version, number of objects). If no data is available
647 to read, returns (None, None).
652 if header[:4] != 'PACK':
653 raise AssertionError('Invalid pack header %r' % header)
654 (version,) = unpack_from('>L', header, 4)
655 if version not in (2, 3):
656 raise AssertionError('Version was %d' % version)
657 (num_objects,) = unpack_from('>L', header, 8)
658 return (version, num_objects)
661 def chunks_length(chunks):
662 return sum(imap(len, chunks))
665 def unpack_object(read_all, read_some=None, compute_crc32=False,
666 include_comp=False, zlib_bufsize=_ZLIB_BUFSIZE):
667 """Unpack a Git object.
669 :param read_all: Read function that blocks until the number of requested
671 :param read_some: Read function that returns at least one byte, but may not
672 return the number of bytes requested.
673 :param compute_crc32: If True, compute the CRC32 of the compressed data. If
674 False, the returned CRC32 will be None.
675 :param include_comp: If True, include compressed data in the result.
676 :param zlib_bufsize: An optional buffer size for zlib operations.
677 :return: A tuple of (unpacked, unused), where unused is the unused data
678 leftover from decompression, and unpacked in an UnpackedObject with
679 the following attrs set:
681 * obj_chunks (for non-delta types)
683 * delta_base (for delta types)
684 * comp_chunks (if include_comp is True)
687 * crc32 (if compute_crc32 is True)
689 if read_some is None:
696 bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)
697 type_num = (bytes[0] >> 4) & 0x07
698 size = bytes[0] & 0x0f
699 for i, byte in enumerate(bytes[1:]):
700 size += (byte & 0x7f) << ((i * 7) + 4)
702 raw_base = len(bytes)
703 if type_num == OFS_DELTA:
704 bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)
705 raw_base += len(bytes)
708 delta_base_offset = bytes[0] & 0x7f
709 for byte in bytes[1:]:
710 delta_base_offset += 1
711 delta_base_offset <<= 7
712 delta_base_offset += (byte & 0x7f)
713 delta_base = delta_base_offset
714 elif type_num == REF_DELTA:
715 delta_base = read_all(20)
717 crc32 = binascii.crc32(delta_base, crc32)
722 unpacked = UnpackedObject(type_num, delta_base, size, crc32)
723 unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,
724 include_comp=include_comp)
725 return unpacked, unused
728 def _compute_object_size((num, obj)):
729 """Compute the size of a unresolved object for use with LRUSizeCache."""
730 if num in DELTA_TYPES:
731 return chunks_length(obj[1])
732 return chunks_length(obj)
735 class PackStreamReader(object):
736 """Class to read a pack stream.
738 The pack is read from a ReceivableProtocol using read() or recv() as
742 def __init__(self, read_all, read_some=None, zlib_bufsize=_ZLIB_BUFSIZE):
743 self.read_all = read_all
744 if read_some is None:
745 self.read_some = read_all
747 self.read_some = read_some
748 self.sha = make_sha()
750 self._rbuf = StringIO()
751 # trailer is a deque to avoid memory allocation on small reads
752 self._trailer = deque()
753 self._zlib_bufsize = zlib_bufsize
755 def _read(self, read, size):
756 """Read up to size bytes using the given callback.
758 As a side effect, update the verifier's hash (excluding the last 20
761 :param read: The read callback to read from.
762 :param size: The maximum number of bytes to read; the particular
763 behavior is callback-specific.
767 # maintain a trailer of the last 20 bytes we've read
770 tn = len(self._trailer)
775 to_pop = max(n + tn - 20, 0)
777 for _ in xrange(to_pop):
778 self.sha.update(self._trailer.popleft())
779 self._trailer.extend(data[-to_add:])
781 # hash everything but the trailer
782 self.sha.update(data[:-to_add])
788 buf.seek(0, SEEK_END)
795 return self._offset - self._buf_len()
797 def read(self, size):
798 """Read, blocking until size bytes are read."""
799 buf_len = self._buf_len()
801 return self._rbuf.read(size)
802 buf_data = self._rbuf.read()
803 self._rbuf = StringIO()
804 return buf_data + self._read(self.read_all, size - buf_len)
806 def recv(self, size):
807 """Read up to size bytes, blocking until one byte is read."""
808 buf_len = self._buf_len()
810 data = self._rbuf.read(size)
812 self._rbuf = StringIO()
814 return self._read(self.read_some, size)
817 return self._num_objects
819 def read_objects(self, compute_crc32=False):
820 """Read the objects in this pack file.
822 :param compute_crc32: If True, compute the CRC32 of the compressed
823 data. If False, the returned CRC32 will be None.
824 :return: Iterator over UnpackedObjects with the following members set:
827 obj_chunks (for non-delta types)
828 delta_base (for delta types)
831 crc32 (if compute_crc32 is True)
832 :raise ChecksumMismatch: if the checksum of the pack contents does not
833 match the checksum in the pack trailer.
834 :raise zlib.error: if an error occurred during zlib decompression.
835 :raise IOError: if an error occurred writing to the output file.
837 pack_version, self._num_objects = read_pack_header(self.read)
838 if pack_version is None:
841 for i in xrange(self._num_objects):
843 unpacked, unused = unpack_object(
844 self.read, read_some=self.recv, compute_crc32=compute_crc32,
845 zlib_bufsize=self._zlib_bufsize)
846 unpacked.offset = offset
848 # prepend any unused data to current read buffer
851 buf.write(self._rbuf.read())
857 if self._buf_len() < 20:
858 # If the read buffer is full, then the last read() got the whole
859 # trailer off the wire. If not, it means there is still some of the
860 # trailer to read. We need to read() all 20 bytes; N come from the
861 # read buffer and (20 - N) come from the wire.
864 pack_sha = ''.join(self._trailer)
865 if pack_sha != self.sha.digest():
866 raise ChecksumMismatch(sha_to_hex(pack_sha), self.sha.hexdigest())
869 class PackStreamCopier(PackStreamReader):
870 """Class to verify a pack stream as it is being read.
872 The pack is read from a ReceivableProtocol using read() or recv() as
873 appropriate and written out to the given file-like object.
876 def __init__(self, read_all, read_some, outfile, delta_iter=None):
877 """Initialize the copier.
879 :param read_all: Read function that blocks until the number of requested
881 :param read_some: Read function that returns at least one byte, but may
882 not return the number of bytes requested.
883 :param outfile: File-like object to write output through.
884 :param delta_iter: Optional DeltaChainIterator to record deltas as we
887 super(PackStreamCopier, self).__init__(read_all, read_some=read_some)
888 self.outfile = outfile
889 self._delta_iter = delta_iter
891 def _read(self, read, size):
892 """Read data from the read callback and write it to the file."""
893 data = super(PackStreamCopier, self)._read(read, size)
894 self.outfile.write(data)
898 """Verify a pack stream and write it to the output file.
900 See PackStreamReader.iterobjects for a list of exceptions this may
904 for unpacked in self.read_objects():
905 self._delta_iter.record(unpacked)
907 for _ in self.read_objects():
911 def obj_sha(type, chunks):
912 """Compute the SHA for a numeric type and object chunks."""
914 sha.update(object_header(type, chunks_length(chunks)))
920 def compute_file_sha(f, start_ofs=0, end_ofs=0, buffer_size=1<<16):
921 """Hash a portion of a file into a new SHA.
923 :param f: A file-like object to read from that supports seek().
924 :param start_ofs: The offset in the file to start reading at.
925 :param end_ofs: The offset in the file to end reading at, relative to the
927 :param buffer_size: A buffer size for reading.
928 :return: A new SHA object updated with data read from the file.
932 todo = f.tell() + end_ofs - start_ofs
935 data = f.read(min(todo, buffer_size))
941 class PackData(object):
942 """The data contained in a packfile.
944 Pack files can be accessed both sequentially for exploding a pack, and
945 directly with the help of an index to retrieve a specific object.
947 The objects within are either complete or a delta aginst another.
949 The header is variable length. If the MSB of each byte is set then it
950 indicates that the subsequent byte is still part of the header.
951 For the first byte the next MS bits are the type, which tells you the type
952 of object, and whether it is a delta. The LS byte is the lowest bits of the
953 size. For each subsequent byte the LS 7 bits are the next MS bits of the
954 size, i.e. the last byte of the header contains the MS bits of the size.
956 For the complete objects the data is stored as zlib deflated data.
957 The size in the header is the uncompressed object size, so to uncompress
958 you need to just keep feeding data to zlib until you get an object back,
959 or it errors on bad data. This is done here by just giving the complete
960 buffer from the start of the deflated object on. This is bad, but until I
961 get mmap sorted out it will have to do.
963 Currently there are no integrity checks done. Also no attempt is made to
964 try and detect the delta case, or a request for an object at the wrong
965 position. It will all just throw a zlib or KeyError.
968 def __init__(self, filename, file=None, size=None):
969 """Create a PackData object representing the pack in the given filename.
971 The file must exist and stay readable until the object is disposed of. It
972 must also stay the same size. It will be mapped whenever needed.
974 Currently there is a restriction on the size of the pack as the python
975 mmap implementation is flawed.
977 self._filename = filename
979 self._header_size = 12
981 self._file = GitFile(self._filename, 'rb')
984 (version, self._num_objects) = read_pack_header(self._file.read)
985 self._offset_cache = LRUSizeCache(1024*1024*20,
986 compute_size=_compute_object_size)
990 def from_file(cls, file, size):
991 return cls(str(file), file=file, size=size)
994 def from_path(cls, path):
995 return cls(filename=path)
1000 def _get_size(self):
1001 if self._size is not None:
1003 self._size = os.path.getsize(self._filename)
1004 if self._size < self._header_size:
1005 errmsg = ('%s is too small for a packfile (%d < %d)' %
1006 (self._filename, self._size, self._header_size))
1007 raise AssertionError(errmsg)
1011 """Returns the number of objects in this pack."""
1012 return self._num_objects
1014 def calculate_checksum(self):
1015 """Calculate the checksum for this pack.
1017 :return: 20-byte binary SHA1 digest
1019 return compute_file_sha(self._file, end_ofs=-20).digest()
1021 def get_ref(self, sha):
1022 """Get the object for a ref SHA, only looking in this pack."""
1023 # TODO: cache these results
1024 if self.pack is None:
1027 offset = self.pack.index.object_index(sha)
1031 type, obj = self.get_object_at(offset)
1032 elif self.pack is not None and self.pack.resolve_ext_ref:
1033 type, obj = self.pack.resolve_ext_ref(sha)
1036 return offset, type, obj
1038 def resolve_object(self, offset, type, obj, get_ref=None):
1039 """Resolve an object, possibly resolving deltas when necessary.
1041 :return: Tuple with object type and contents.
1043 if type not in DELTA_TYPES:
1047 get_ref = self.get_ref
1048 if type == OFS_DELTA:
1049 (delta_offset, delta) = obj
1050 # TODO: clean up asserts and replace with nicer error messages
1051 assert isinstance(offset, int) or isinstance(offset, long)
1052 assert isinstance(delta_offset, int) or isinstance(offset, long)
1053 base_offset = offset-delta_offset
1054 type, base_obj = self.get_object_at(base_offset)
1055 assert isinstance(type, int)
1056 elif type == REF_DELTA:
1057 (basename, delta) = obj
1058 assert isinstance(basename, str) and len(basename) == 20
1059 base_offset, type, base_obj = get_ref(basename)
1060 assert isinstance(type, int)
1061 type, base_chunks = self.resolve_object(base_offset, type, base_obj)
1062 chunks = apply_delta(base_chunks, delta)
1063 # TODO(dborowitz): This can result in poor performance if large base
1064 # objects are separated from deltas in the pack. We should reorganize
1065 # so that we apply deltas to all objects in a chain one after the other
1066 # to optimize cache performance.
1067 if offset is not None:
1068 self._offset_cache[offset] = type, chunks
1071 def iterobjects(self, progress=None, compute_crc32=True):
1072 self._file.seek(self._header_size)
1073 for i in xrange(1, self._num_objects + 1):
1074 offset = self._file.tell()
1075 unpacked, unused = unpack_object(
1076 self._file.read, compute_crc32=compute_crc32)
1077 if progress is not None:
1078 progress(i, self._num_objects)
1079 yield (offset, unpacked.pack_type_num, unpacked._obj(),
1081 self._file.seek(-len(unused), SEEK_CUR) # Back up over unused data.
1083 def _iter_unpacked(self):
1084 # TODO(dborowitz): Merge this with iterobjects, if we can change its
1086 self._file.seek(self._header_size)
1087 for _ in xrange(self._num_objects):
1088 offset = self._file.tell()
1089 unpacked, unused = unpack_object(
1090 self._file.read, compute_crc32=False)
1091 unpacked.offset = offset
1093 self._file.seek(-len(unused), SEEK_CUR) # Back up over unused data.
1095 def iterentries(self, progress=None):
1096 """Yield entries summarizing the contents of this pack.
1098 :param progress: Progress function, called with current and total
1100 :return: iterator of tuples with (sha, offset, crc32)
1102 num_objects = self._num_objects
1104 self.pack.resolve_ext_ref if self.pack is not None else None)
1105 indexer = PackIndexer.for_pack_data(
1106 self, resolve_ext_ref=resolve_ext_ref)
1107 for i, result in enumerate(indexer):
1108 if progress is not None:
1109 progress(i, num_objects)
1112 def sorted_entries(self, progress=None):
1113 """Return entries in this pack, sorted by SHA.
1115 :param progress: Progress function, called with current and total
1117 :return: List of tuples with (sha, offset, crc32)
1119 ret = list(self.iterentries(progress=progress))
1123 def create_index_v1(self, filename, progress=None):
1124 """Create a version 1 file for this data file.
1126 :param filename: Index filename.
1127 :param progress: Progress report function
1128 :return: Checksum of index file
1130 entries = self.sorted_entries(progress=progress)
1131 f = GitFile(filename, 'wb')
1133 return write_pack_index_v1(f, entries, self.calculate_checksum())
1137 def create_index_v2(self, filename, progress=None):
1138 """Create a version 2 index file for this data file.
1140 :param filename: Index filename.
1141 :param progress: Progress report function
1142 :return: Checksum of index file
1144 entries = self.sorted_entries(progress=progress)
1145 f = GitFile(filename, 'wb')
1147 return write_pack_index_v2(f, entries, self.calculate_checksum())
1151 def create_index(self, filename, progress=None,
1153 """Create an index file for this data file.
1155 :param filename: Index filename.
1156 :param progress: Progress report function
1157 :return: Checksum of index file
1160 return self.create_index_v1(filename, progress)
1162 return self.create_index_v2(filename, progress)
1164 raise ValueError('unknown index format %d' % version)
1166 def get_stored_checksum(self):
1167 """Return the expected checksum stored in this pack."""
1168 self._file.seek(-20, SEEK_END)
1169 return self._file.read(20)
1172 """Check the consistency of this pack."""
1173 actual = self.calculate_checksum()
1174 stored = self.get_stored_checksum()
1175 if actual != stored:
1176 raise ChecksumMismatch(stored, actual)
1178 def get_object_at(self, offset):
1179 """Given an offset in to the packfile return the object that is there.
1181 Using the associated index the location of an object can be looked up,
1182 and then the packfile can be asked directly for that object using this
1185 if offset in self._offset_cache:
1186 return self._offset_cache[offset]
1187 assert isinstance(offset, long) or isinstance(offset, int),\
1188 'offset was %r' % offset
1189 assert offset >= self._header_size
1190 self._file.seek(offset)
1191 unpacked, _ = unpack_object(self._file.read)
1192 return (unpacked.pack_type_num, unpacked._obj())
1195 class DeltaChainIterator(object):
1196 """Abstract iterator over pack data based on delta chains.
1198 Each object in the pack is guaranteed to be inflated exactly once,
1199 regardless of how many objects reference it as a delta base. As a result,
1200 memory usage is proportional to the length of the longest delta chain.
1202 Subclasses can override _result to define the result type of the iterator.
1203 By default, results are UnpackedObjects with the following members set:
1209 * delta_base (for delta types)
1210 * comp_chunks (if _include_comp is True)
1213 * crc32 (if _compute_crc32 is True)
1216 _compute_crc32 = False
1217 _include_comp = False
1219 def __init__(self, file_obj, resolve_ext_ref=None):
1220 self._file = file_obj
1221 self._resolve_ext_ref = resolve_ext_ref
1222 self._pending_ofs = defaultdict(list)
1223 self._pending_ref = defaultdict(list)
1229 def for_pack_data(cls, pack_data, resolve_ext_ref=None):
1230 walker = cls(None, resolve_ext_ref=resolve_ext_ref)
1231 walker.set_pack_data(pack_data)
1232 for unpacked in pack_data._iter_unpacked():
1233 walker.record(unpacked)
1236 def record(self, unpacked):
1237 type_num = unpacked.pack_type_num
1238 offset = unpacked.offset
1239 if type_num == OFS_DELTA:
1240 base_offset = offset - unpacked.delta_base
1241 self._pending_ofs[base_offset].append(offset)
1242 elif type_num == REF_DELTA:
1243 self._pending_ref[unpacked.delta_base].append(offset)
1245 self._full_ofs.append((offset, type_num))
1247 def set_pack_data(self, pack_data):
1248 self._file = pack_data._file
1250 def _walk_all_chains(self):
1251 for offset, type_num in self._full_ofs:
1252 for result in self._follow_chain(offset, type_num, None):
1254 for result in self._walk_ref_chains():
1256 assert not self._pending_ofs
1258 def _ensure_no_pending(self):
1259 if self._pending_ref:
1260 raise KeyError([sha_to_hex(s) for s in self._pending_ref])
1262 def _walk_ref_chains(self):
1263 if not self._resolve_ext_ref:
1264 self._ensure_no_pending()
1267 for base_sha, pending in sorted(self._pending_ref.iteritems()):
1269 type_num, chunks = self._resolve_ext_ref(base_sha)
1271 # Not an external ref, but may depend on one. Either it will get
1272 # popped via a _follow_chain call, or we will raise an error
1275 self._ext_refs.append(base_sha)
1276 self._pending_ref.pop(base_sha)
1277 for new_offset in pending:
1278 for result in self._follow_chain(new_offset, type_num, chunks):
1281 self._ensure_no_pending()
1283 def _result(self, unpacked):
1286 def _resolve_object(self, offset, obj_type_num, base_chunks):
1287 self._file.seek(offset)
1288 unpacked, _ = unpack_object(
1289 self._file.read, include_comp=self._include_comp,
1290 compute_crc32=self._compute_crc32)
1291 unpacked.offset = offset
1292 if base_chunks is None:
1293 assert unpacked.pack_type_num == obj_type_num
1295 assert unpacked.pack_type_num in DELTA_TYPES
1296 unpacked.obj_type_num = obj_type_num
1297 unpacked.obj_chunks = apply_delta(base_chunks,
1298 unpacked.decomp_chunks)
1301 def _follow_chain(self, offset, obj_type_num, base_chunks):
1302 # Unlike PackData.get_object_at, there is no need to cache offsets as
1303 # this approach by design inflates each object exactly once.
1304 unpacked = self._resolve_object(offset, obj_type_num, base_chunks)
1305 yield self._result(unpacked)
1307 pending = chain(self._pending_ofs.pop(unpacked.offset, []),
1308 self._pending_ref.pop(unpacked.sha(), []))
1309 for new_offset in pending:
1310 for new_result in self._follow_chain(
1311 new_offset, unpacked.obj_type_num, unpacked.obj_chunks):
1315 return self._walk_all_chains()
1318 return self._ext_refs
1321 class PackIndexer(DeltaChainIterator):
1322 """Delta chain iterator that yields index entries."""
1324 _compute_crc32 = True
1326 def _result(self, unpacked):
1327 return unpacked.sha(), unpacked.offset, unpacked.crc32
1330 class PackInflater(DeltaChainIterator):
1331 """Delta chain iterator that yields ShaFile objects."""
1333 def _result(self, unpacked):
1334 return unpacked.sha_file()
1337 class SHA1Reader(object):
1338 """Wrapper around a file-like object that remembers the SHA1 of its data."""
1340 def __init__(self, f):
1342 self.sha1 = make_sha('')
1344 def read(self, num=None):
1345 data = self.f.read(num)
1346 self.sha1.update(data)
1349 def check_sha(self):
1350 stored = self.f.read(20)
1351 if stored != self.sha1.digest():
1352 raise ChecksumMismatch(self.sha1.hexdigest(), sha_to_hex(stored))
1355 return self.f.close()
1358 return self.f.tell()
1361 class SHA1Writer(object):
1362 """Wrapper around a file-like object that remembers the SHA1 of its data."""
1364 def __init__(self, f):
1367 self.sha1 = make_sha('')
1369 def write(self, data):
1370 self.sha1.update(data)
1372 self.length += len(data)
1374 def write_sha(self):
1375 sha = self.sha1.digest()
1376 assert len(sha) == 20
1378 self.length += len(sha)
1382 sha = self.write_sha()
1390 return self.f.tell()
1393 def pack_object_header(type_num, delta_base, size):
1394 """Create a pack object header for the given object info.
1396 :param type_num: Numeric type of the object.
1397 :param delta_base: Delta base offset or ref, or None for whole objects.
1398 :param size: Uncompressed object size.
1399 :return: A header for a packed object.
1402 c = (type_num << 4) | (size & 15)
1405 header += (chr(c | 0x80))
1409 if type_num == OFS_DELTA:
1410 ret = [delta_base & 0x7f]
1414 ret.insert(0, 0x80 | (delta_base & 0x7f))
1416 header += ''.join([chr(x) for x in ret])
1417 elif type_num == REF_DELTA:
1418 assert len(delta_base) == 20
1419 header += delta_base
1423 def write_pack_object(f, type, object, sha=None):
1424 """Write pack object to a file.
1426 :param f: File to write to
1427 :param type: Numeric type of the object
1428 :param object: Object to write
1429 :return: Tuple with offset at which the object was written, and crc32
1431 if type in DELTA_TYPES:
1432 delta_base, object = object
1435 header = pack_object_header(type, delta_base, len(object))
1436 comp_data = zlib.compress(object)
1438 for data in (header, comp_data):
1442 crc32 = binascii.crc32(data, crc32)
1443 return crc32 & 0xffffffff
1446 def write_pack(filename, objects, num_objects=None):
1447 """Write a new pack data file.
1449 :param filename: Path to the new pack file (without .pack extension)
1450 :param objects: Iterable of (object, path) tuples to write.
1451 Should provide __len__
1452 :return: Tuple with checksum of pack file and index file
1454 if num_objects is not None:
1455 warnings.warn('num_objects argument to write_pack is deprecated',
1457 f = GitFile(filename + '.pack', 'wb')
1459 entries, data_sum = write_pack_objects(f, objects,
1460 num_objects=num_objects)
1463 entries = [(k, v[0], v[1]) for (k, v) in entries.iteritems()]
1465 f = GitFile(filename + '.idx', 'wb')
1467 return data_sum, write_pack_index_v2(f, entries, data_sum)
1472 def write_pack_header(f, num_objects):
1473 """Write a pack header for the given number of objects."""
1474 f.write('PACK') # Pack header
1475 f.write(struct.pack('>L', 2)) # Pack version
1476 f.write(struct.pack('>L', num_objects)) # Number of objects in pack
1479 def deltify_pack_objects(objects, window=10):
1480 """Generate deltas for pack objects.
1482 :param objects: Objects to deltify
1483 :param window: Window size
1484 :return: Iterator over type_num, object id, delta_base, content
1485 delta_base is None for full text entries
1487 # Build a list of objects ordered by the magic Linus heuristic
1488 # This helps us find good objects to diff against us
1490 for obj, path in objects:
1491 magic.append((obj.type_num, path, -obj.raw_length(), obj))
1494 possible_bases = deque()
1496 for type_num, path, neg_length, o in magic:
1497 raw = o.as_raw_string()
1500 for base in possible_bases:
1501 if base.type_num != type_num:
1503 delta = create_delta(base.as_raw_string(), raw)
1504 if len(delta) < len(winner):
1505 winner_base = base.sha().digest()
1507 yield type_num, o.sha().digest(), winner_base, winner
1508 possible_bases.appendleft(o)
1509 while len(possible_bases) > window:
1510 possible_bases.pop()
1513 def write_pack_objects(f, objects, window=10, num_objects=None):
1514 """Write a new pack data file.
1516 :param f: File to write to
1517 :param objects: Iterable of (object, path) tuples to write.
1518 Should provide __len__
1519 :param window: Sliding window size for searching for deltas; currently
1521 :param num_objects: Number of objects (do not use, deprecated)
1522 :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
1524 if num_objects is None:
1525 num_objects = len(objects)
1526 # FIXME: pack_contents = deltify_pack_objects(objects, window)
1528 (o.type_num, o.sha().digest(), None, o.as_raw_string())
1529 for (o, path) in objects)
1530 return write_pack_data(f, num_objects, pack_contents)
1533 def write_pack_data(f, num_records, records):
1534 """Write a new pack data file.
1536 :param f: File to write to
1537 :param num_records: Number of records
1538 :param records: Iterator over type_num, object_id, delta_base, raw
1539 :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
1544 write_pack_header(f, num_records)
1545 for type_num, object_id, delta_base, raw in records:
1546 if delta_base is not None:
1548 base_offset, base_crc32 = entries[delta_base]
1550 type_num = REF_DELTA
1551 raw = (delta_base, raw)
1553 type_num = OFS_DELTA
1554 raw = (base_offset, raw)
1556 crc32 = write_pack_object(f, type_num, raw)
1557 entries[object_id] = (offset, crc32)
1558 return entries, f.write_sha()
1561 def write_pack_index_v1(f, entries, pack_checksum):
1562 """Write a new pack index file.
1564 :param f: A file-like object to write to
1565 :param entries: List of tuples with object name (sha), offset_in_pack,
1567 :param pack_checksum: Checksum of the pack file.
1568 :return: The SHA of the written index file
1571 fan_out_table = defaultdict(lambda: 0)
1572 for (name, offset, entry_checksum) in entries:
1573 fan_out_table[ord(name[0])] += 1
1575 for i in range(0x100):
1576 f.write(struct.pack('>L', fan_out_table[i]))
1577 fan_out_table[i+1] += fan_out_table[i]
1578 for (name, offset, entry_checksum) in entries:
1579 if not (offset <= 0xffffffff):
1580 raise TypeError("pack format 1 only supports offsets < 2Gb")
1581 f.write(struct.pack('>L20s', offset, name))
1582 assert len(pack_checksum) == 20
1583 f.write(pack_checksum)
1584 return f.write_sha()
1587 def create_delta(base_buf, target_buf):
1588 """Use python difflib to work out how to transform base_buf to target_buf.
1590 :param base_buf: Base buffer
1591 :param target_buf: Target buffer
1593 assert isinstance(base_buf, str)
1594 assert isinstance(target_buf, str)
1596 # write delta header
1597 def encode_size(size):
1602 ret += chr(c | 0x80)
1607 out_buf += encode_size(len(base_buf))
1608 out_buf += encode_size(len(target_buf))
1609 # write out delta opcodes
1610 seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
1611 for opcode, i1, i2, j1, j2 in seq.get_opcodes():
1612 # Git patch opcodes don't care about deletes!
1613 #if opcode == 'replace' or opcode == 'delete':
1615 if opcode == 'equal':
1616 # If they are equal, unpacker will use data from base_buf
1617 # Write out an opcode that says what range to use
1623 scratch += chr((o >> i*8) & 0xff)
1628 scratch += chr((s >> i*8) & 0xff)
1632 if opcode == 'replace' or opcode == 'insert':
1633 # If we are replacing a range or adding one, then we just
1634 # output it to the stream (prefixed by its size)
1639 out_buf += target_buf[o:o+127]
1643 out_buf += target_buf[o:o+s]
1647 def apply_delta(src_buf, delta):
1648 """Based on the similar function in git's patch-delta.c.
1650 :param src_buf: Source buffer
1651 :param delta: Delta instructions
1653 if type(src_buf) != str:
1654 src_buf = ''.join(src_buf)
1655 if type(delta) != str:
1656 delta = ''.join(delta)
1659 delta_length = len(delta)
1660 def get_delta_header_size(delta, index):
1664 cmd = ord(delta[index])
1666 size |= (cmd & ~0x80) << i
1671 src_size, index = get_delta_header_size(delta, index)
1672 dest_size, index = get_delta_header_size(delta, index)
1673 assert src_size == len(src_buf), '%d vs %d' % (src_size, len(src_buf))
1674 while index < delta_length:
1675 cmd = ord(delta[index])
1681 x = ord(delta[index])
1683 cp_off |= x << (i * 8)
1686 if cmd & (1 << (4+i)):
1687 x = ord(delta[index])
1689 cp_size |= x << (i * 8)
1692 if (cp_off + cp_size < cp_size or
1693 cp_off + cp_size > src_size or
1694 cp_size > dest_size):
1696 out.append(src_buf[cp_off:cp_off+cp_size])
1698 out.append(delta[index:index+cmd])
1701 raise ApplyDeltaError('Invalid opcode 0')
1703 if index != delta_length:
1704 raise ApplyDeltaError('delta not empty: %r' % delta[index:])
1706 if dest_size != chunks_length(out):
1707 raise ApplyDeltaError('dest size incorrect')
1712 def write_pack_index_v2(f, entries, pack_checksum):
1713 """Write a new pack index file.
1715 :param f: File-like object to write to
1716 :param entries: List of tuples with object name (sha), offset_in_pack, and
1718 :param pack_checksum: Checksum of the pack file.
1719 :return: The SHA of the index file written
1722 f.write('\377tOc') # Magic!
1723 f.write(struct.pack('>L', 2))
1724 fan_out_table = defaultdict(lambda: 0)
1725 for (name, offset, entry_checksum) in entries:
1726 fan_out_table[ord(name[0])] += 1
1729 for i in range(0x100):
1730 f.write(struct.pack('>L', fan_out_table[i]))
1731 fan_out_table[i+1] += fan_out_table[i]
1732 for (name, offset, entry_checksum) in entries:
1734 for (name, offset, entry_checksum) in entries:
1735 f.write(struct.pack('>L', entry_checksum))
1736 for (name, offset, entry_checksum) in entries:
1738 f.write(struct.pack('>L', offset))
1740 f.write(struct.pack('>L', 2**31 + len(largetable)))
1741 largetable.append(offset)
1742 for offset in largetable:
1743 f.write(struct.pack('>Q', offset))
1744 assert len(pack_checksum) == 20
1745 f.write(pack_checksum)
1746 return f.write_sha()
1750 """A Git pack object."""
1752 def __init__(self, basename, resolve_ext_ref=None):
1753 self._basename = basename
1756 self._idx_path = self._basename + '.idx'
1757 self._data_path = self._basename + '.pack'
1758 self._data_load = lambda: PackData(self._data_path)
1759 self._idx_load = lambda: load_pack_index(self._idx_path)
1760 self.resolve_ext_ref = resolve_ext_ref
1763 def from_lazy_objects(self, data_fn, idx_fn):
1764 """Create a new pack object from callables to load pack data and
1767 ret._data_load = data_fn
1768 ret._idx_load = idx_fn
1772 def from_objects(self, data, idx):
1773 """Create a new pack object from pack data and index objects."""
1775 ret._data_load = lambda: data
1776 ret._idx_load = lambda: idx
1780 """The SHA over the SHAs of the objects in this pack."""
1781 return self.index.objects_sha1()
1785 """The pack data object being used."""
1786 if self._data is None:
1787 self._data = self._data_load()
1788 self._data.pack = self
1789 self.check_length_and_checksum()
1794 """The index being used.
1796 :note: This may be an in-memory index
1798 if self._idx is None:
1799 self._idx = self._idx_load()
1803 if self._data is not None:
1807 def __eq__(self, other):
1808 return type(self) == type(other) and self.index == other.index
1811 """Number of entries in this pack."""
1812 return len(self.index)
1815 return '%s(%r)' % (self.__class__.__name__, self._basename)
1818 """Iterate over all the sha1s of the objects in this pack."""
1819 return iter(self.index)
1821 def check_length_and_checksum(self):
1822 """Sanity check the length and checksum of the pack index and data."""
1823 assert len(self.index) == len(self.data)
1824 idx_stored_checksum = self.index.get_pack_checksum()
1825 data_stored_checksum = self.data.get_stored_checksum()
1826 if idx_stored_checksum != data_stored_checksum:
1827 raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
1828 sha_to_hex(data_stored_checksum))
1831 """Check the integrity of this pack.
1833 :raise ChecksumMismatch: if a checksum for the index or data is wrong
1837 for obj in self.iterobjects():
1839 # TODO: object connectivity checks
1841 def get_stored_checksum(self):
1842 return self.data.get_stored_checksum()
1844 def __contains__(self, sha1):
1845 """Check whether this pack contains a particular SHA1."""
1847 self.index.object_index(sha1)
1852 def get_raw(self, sha1):
1853 offset = self.index.object_index(sha1)
1854 obj_type, obj = self.data.get_object_at(offset)
1855 type_num, chunks = self.data.resolve_object(offset, obj_type, obj)
1856 return type_num, ''.join(chunks)
1858 def __getitem__(self, sha1):
1859 """Retrieve the specified SHA1."""
1860 type, uncomp = self.get_raw(sha1)
1861 return ShaFile.from_raw_string(type, uncomp)
1863 def iterobjects(self):
1864 """Iterate over the objects in this pack."""
1865 return iter(PackInflater.for_pack_data(
1866 self.data, resolve_ext_ref=self.resolve_ext_ref))
1868 def pack_tuples(self):
1869 """Provide an iterable for use with write_pack_objects.
1871 :return: Object that can iterate over (object, path) tuples
1872 and provides __len__
1874 class PackTupleIterable(object):
1876 def __init__(self, pack):
1880 return len(self.pack)
1883 return ((o, None) for o in self.pack.iterobjects())
1885 return PackTupleIterable(self)
1887 def keep(self, msg=None):
1888 """Add a .keep file for the pack, preventing git from garbage collecting it.
1890 :param msg: A message written inside the .keep file; can be used later to
1891 determine whether or not a .keep file is obsolete.
1892 :return: The path of the .keep file, as a string.
1894 keepfile_name = '%s.keep' % self._basename
1895 keepfile = GitFile(keepfile_name, 'wb')
1899 keepfile.write('\n')
1902 return keepfile_name
1906 from dulwich._pack import apply_delta, bisect_find_sha