1 # pack.py -- For dealing wih packed git objects.
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3 # Copryight (C) 2008 Jelmer Vernooij <jelmer@samba.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; version 2
8 # of the License or (at your option) a later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Classes for dealing with packed git objects.
22 A pack is a compact representation of a bunch of objects, stored
23 using deltas where possible.
25 They have two parts, the pack file, which stores the data, and an index
26 that tells you where the data is.
28 To find an object you look in all of the index files 'til you find a
29 match for the object name. You then use the pointer got from this as
30 a pointer in to the corresponding packfile.
34 from collections import defaultdict
36 from misc import defaultdict
38 from itertools import imap, izip
43 from struct import unpack_from
45 from dulwich.misc import unpack_from
50 from dulwich.errors import (
54 from dulwich.lru_cache import (
57 from dulwich.objects import (
62 from dulwich.misc import make_sha
64 supports_mmap_offset = (sys.version_info[0] >= 3 or
65 (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
68 def take_msb_bytes(map, offset):
70 while len(ret) == 0 or ret[-1] & 0x80:
71 ret.append(ord(map[offset]))
76 def read_zlib(data, offset, dec_size):
77 obj = zlib.decompressobj()
80 while obj.unused_data == "":
82 add = data[base:base+1024]
86 x += obj.decompress(add)
87 assert len(x) == dec_size
88 comp_len = fed-len(obj.unused_data)
93 """Return the hexdigest of the SHA1 over a set of names."""
97 return sha1.hexdigest()
100 MAX_MMAP_SIZE = 1024 * 1024 * 1024
102 def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
103 """Simple wrapper for mmap() which always supports the offset parameter.
105 :param f: File object.
106 :param offset: Offset in the file, from the beginning of the file.
107 :param size: Size of the mmap'ed area
108 :param access: Access mechanism.
109 :return: MMAP'd area.
111 if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset:
112 raise AssertionError("%s is larger than 256 meg, and this version "
113 "of Python does not support the offset argument to mmap().")
114 if supports_mmap_offset:
115 return mmap.mmap(f.fileno(), size, access=access, offset=offset), 0
117 mem = mmap.mmap(f.fileno(), size+offset, access=access)
121 def load_pack_index(filename):
122 f = open(filename, 'r')
123 if f.read(4) == '\377tOc':
124 version = struct.unpack(">L", f.read(4))[0]
127 return PackIndex2(filename, file=f)
129 raise KeyError("Unknown pack index format %d" % version)
132 return PackIndex1(filename, file=f)
135 class PackIndex(object):
136 """An index in to a packfile.
138 Given a sha id of an object a pack index can tell you the location in the
139 packfile of that object if it has it.
141 To do the loop it opens the file, and indexes first 256 4 byte groups
142 with the first byte of the sha id. The value in the four byte group indexed
143 is the end of the group that shares the same starting byte. Subtract one
144 from the starting byte and index again to find the start of the group.
145 The values are sorted by sha id within the group, so do the math to find
146 the start and end offset and then bisect in to find if the value is present.
149 def __init__(self, filename, file=None):
150 """Create a pack index object.
152 Provide it with the name of the index file to consider, and it will map
153 it whenever required.
155 self._filename = filename
156 # Take the size now, so it can be checked each time we map the file to
157 # ensure that it hasn't changed.
158 self._size = os.path.getsize(filename)
160 self._file = open(filename, 'r')
163 self._contents, map_offset = simple_mmap(self._file, 0, self._size)
164 assert map_offset == 0
166 def __eq__(self, other):
167 if not isinstance(other, PackIndex):
170 if self._fan_out_table != other._fan_out_table:
173 for (name1, _, _), (name2, _, _) in izip(self.iterentries(), other.iterentries()):
182 """Return the number of entries in this pack index."""
183 return self._fan_out_table[-1]
185 def _unpack_entry(self, i):
186 """Unpack the i-th entry in the index file.
188 :return: Tuple with object name (SHA), offset in pack file and
189 CRC32 checksum (if known)."""
190 raise NotImplementedError(self._unpack_entry)
192 def _unpack_name(self, i):
193 """Unpack the i-th name from the index file."""
194 raise NotImplementedError(self._unpack_name)
196 def _unpack_offset(self, i):
197 """Unpack the i-th object offset from the index file."""
198 raise NotImplementedError(self._unpack_offset)
200 def _unpack_crc32_checksum(self, i):
201 """Unpack the crc32 checksum for the i-th object from the index file."""
202 raise NotImplementedError(self._unpack_crc32_checksum)
205 return imap(sha_to_hex, self._itersha())
208 for i in range(len(self)):
209 yield self._unpack_name(i)
211 def objects_sha1(self):
212 """Return the hex SHA1 over all the shas of all objects in this pack.
214 :note: This is used for the filename of the pack.
216 return iter_sha1(self._itersha())
218 def iterentries(self):
219 """Iterate over the entries in this pack index.
221 Will yield tuples with object name, offset in packfile and crc32 checksum.
223 for i in range(len(self)):
224 yield self._unpack_entry(i)
226 def _read_fan_out_table(self, start_offset):
228 for i in range(0x100):
229 ret.append(struct.unpack(">L", self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])
233 """Check that the stored checksum matches the actual checksum."""
234 return self.calculate_checksum() == self.get_stored_checksum()
236 def calculate_checksum(self):
237 f = open(self._filename, 'r')
239 return make_sha(self._contents[:-20]).digest()
243 def get_pack_checksum(self):
244 """Return the SHA1 checksum stored for the corresponding packfile."""
245 return str(self._contents[-40:-20])
247 def get_stored_checksum(self):
248 """Return the SHA1 checksum stored for this index."""
249 return str(self._contents[-20:])
251 def object_index(self, sha):
252 """Return the index in to the corresponding packfile for the object.
254 Given the name of an object it will return the offset that object lives
255 at within the corresponding pack file. If the pack file doesn't have the
256 object then None will be returned.
259 sha = hex_to_sha(sha)
260 return self._object_index(sha)
262 def _object_index(self, sha):
265 :param sha: A *binary* SHA string. (20 characters long)_
267 assert len(sha) == 20
272 start = self._fan_out_table[idx-1]
273 end = self._fan_out_table[idx]
277 file_sha = self._unpack_name(i)
283 return self._unpack_offset(i)
287 class PackIndex1(PackIndex):
288 """Version 1 Pack Index."""
290 def __init__(self, filename, file=None):
291 PackIndex.__init__(self, filename, file)
293 self._fan_out_table = self._read_fan_out_table(0)
295 def _unpack_entry(self, i):
296 (offset, name) = unpack_from(">L20s", self._contents,
297 (0x100 * 4) + (i * 24))
298 return (name, offset, None)
300 def _unpack_name(self, i):
301 offset = (0x100 * 4) + (i * 24) + 4
302 return self._contents[offset:offset+20]
304 def _unpack_offset(self, i):
305 offset = (0x100 * 4) + (i * 24)
306 return unpack_from(">L", self._contents, offset)[0]
308 def _unpack_crc32_checksum(self, i):
309 # Not stored in v1 index files
313 class PackIndex2(PackIndex):
314 """Version 2 Pack Index."""
316 def __init__(self, filename, file=None):
317 PackIndex.__init__(self, filename, file)
318 assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
319 (self.version, ) = unpack_from(">L", self._contents, 4)
320 assert self.version == 2, "Version was %d" % self.version
321 self._fan_out_table = self._read_fan_out_table(8)
322 self._name_table_offset = 8 + 0x100 * 4
323 self._crc32_table_offset = self._name_table_offset + 20 * len(self)
324 self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
326 def _unpack_entry(self, i):
327 return (self._unpack_name(i), self._unpack_offset(i),
328 self._unpack_crc32_checksum(i))
330 def _unpack_name(self, i):
331 offset = self._name_table_offset + i * 20
332 return self._contents[offset:offset+20]
334 def _unpack_offset(self, i):
335 offset = self._pack_offset_table_offset + i * 4
336 return unpack_from(">L", self._contents, offset)[0]
338 def _unpack_crc32_checksum(self, i):
339 return unpack_from(">L", self._contents,
340 self._crc32_table_offset + i * 4)[0]
344 def read_pack_header(f):
346 assert header[:4] == "PACK"
347 (version,) = unpack_from(">L", header, 4)
348 assert version in (2, 3), "Version was %d" % version
349 (num_objects,) = unpack_from(">L", header, 8)
350 return (version, num_objects)
353 def read_pack_tail(f):
357 def unpack_object(map, offset=0):
358 bytes = take_msb_bytes(map, offset)
359 type = (bytes[0] >> 4) & 0x07
360 size = bytes[0] & 0x0f
361 for i, byte in enumerate(bytes[1:]):
362 size += (byte & 0x7f) << ((i * 7) + 4)
363 raw_base = len(bytes)
364 if type == 6: # offset delta
365 bytes = take_msb_bytes(map, raw_base + offset)
366 assert not (bytes[-1] & 0x80)
367 delta_base_offset = bytes[0] & 0x7f
368 for byte in bytes[1:]:
369 delta_base_offset += 1
370 delta_base_offset <<= 7
371 delta_base_offset += (byte & 0x7f)
373 uncomp, comp_len = read_zlib(map, offset + raw_base, size)
374 assert size == len(uncomp)
375 return type, (delta_base_offset, uncomp), comp_len+raw_base
376 elif type == 7: # ref delta
377 basename = map[offset+raw_base:offset+raw_base+20]
378 uncomp, comp_len = read_zlib(map, offset+raw_base+20, size)
379 assert size == len(uncomp)
380 return type, (basename, uncomp), comp_len+raw_base+20
382 uncomp, comp_len = read_zlib(map, offset+raw_base, size)
383 assert len(uncomp) == size
384 return type, uncomp, comp_len+raw_base
387 def compute_object_size((num, obj)):
390 assert isinstance(obj, str)
394 class PackData(object):
395 """The data contained in a packfile.
397 Pack files can be accessed both sequentially for exploding a pack, and
398 directly with the help of an index to retrieve a specific object.
400 The objects within are either complete or a delta aginst another.
402 The header is variable length. If the MSB of each byte is set then it
403 indicates that the subsequent byte is still part of the header.
404 For the first byte the next MS bits are the type, which tells you the type
405 of object, and whether it is a delta. The LS byte is the lowest bits of the
406 size. For each subsequent byte the LS 7 bits are the next MS bits of the
407 size, i.e. the last byte of the header contains the MS bits of the size.
409 For the complete objects the data is stored as zlib deflated data.
410 The size in the header is the uncompressed object size, so to uncompress
411 you need to just keep feeding data to zlib until you get an object back,
412 or it errors on bad data. This is done here by just giving the complete
413 buffer from the start of the deflated object on. This is bad, but until I
414 get mmap sorted out it will have to do.
416 Currently there are no integrity checks done. Also no attempt is made to try
417 and detect the delta case, or a request for an object at the wrong position.
418 It will all just throw a zlib or KeyError.
421 def __init__(self, filename):
422 """Create a PackData object that represents the pack in the given filename.
424 The file must exist and stay readable until the object is disposed of. It
425 must also stay the same size. It will be mapped whenever needed.
427 Currently there is a restriction on the size of the pack as the python
428 mmap implementation is flawed.
430 self._filename = filename
431 assert os.path.exists(filename), "%s is not a packfile" % filename
432 self._size = os.path.getsize(filename)
433 self._header_size = 12
434 assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size)
436 self._offset_cache = LRUSizeCache(1024*1024*100,
437 compute_size=compute_object_size)
439 def _read_header(self):
440 f = open(self._filename, 'rb')
442 (version, self._num_objects) = \
444 f.seek(self._size-20)
445 (self._stored_checksum,) = read_pack_tail(f)
450 """Returns the number of objects in this pack."""
451 return self._num_objects
453 def calculate_checksum(self):
454 """Calculate the checksum for this pack."""
455 f = open(self._filename, 'rb')
457 map, map_offset = simple_mmap(f, 0, self._size - 20)
458 return make_sha(map[map_offset:self._size-20]).digest()
462 def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
463 """Resolve an object, possibly resolving deltas when necessary.
465 :return: Tuple with object type and contents.
467 if type not in (6, 7): # Not a delta
470 if get_offset is None:
471 get_offset = self.get_object_at
473 if type == 6: # offset delta
474 (delta_offset, delta) = obj
475 assert isinstance(delta_offset, int)
476 assert isinstance(delta, str)
477 base_offset = offset-delta_offset
478 type, base_obj = get_offset(base_offset)
479 assert isinstance(type, int)
480 elif type == 7: # ref delta
481 (basename, delta) = obj
482 assert isinstance(basename, str) and len(basename) == 20
483 assert isinstance(delta, str)
484 type, base_obj = get_ref(basename)
485 assert isinstance(type, int)
486 # Can't be a ofs delta, as we wouldn't know the base offset
489 type, base_text = self.resolve_object(base_offset, type, base_obj, get_ref)
490 if base_offset is not None:
491 self._offset_cache[base_offset] = type, base_text
492 ret = (type, apply_delta(base_text, delta))
495 def iterobjects(self):
496 offset = self._header_size
497 f = open(self._filename, 'rb')
499 map, _ = simple_mmap(f, 0, self._size)
501 (type, obj, total_size) = unpack_object(map, offset)
502 crc32 = zlib.crc32(map[offset:offset+total_size]) & 0xffffffff
503 yield offset, type, obj, crc32
507 def iterentries(self, ext_resolve_ref=None):
509 postponed = defaultdict(list)
510 class Postpone(Exception):
511 """Raised to postpone delta resolving."""
513 def get_ref_text(sha):
518 return ext_resolve_ref(sha)
521 raise Postpone, (sha, )
522 todo = list(self.iterobjects())
524 (offset, type, obj, crc32) = todo.pop(0)
525 assert isinstance(offset, int)
526 assert isinstance(type, int)
527 assert isinstance(obj, tuple) or isinstance(obj, str)
529 type, obj = self.resolve_object(offset, type, obj, get_ref_text)
530 except Postpone, (sha, ):
531 postponed[sha].append((offset, type, obj))
533 shafile = ShaFile.from_raw_string(type, obj)
534 sha = shafile.sha().digest()
535 found[sha] = (type, obj)
536 yield sha, offset, crc32
537 todo += postponed.get(sha, [])
539 raise KeyError([sha_to_hex(h) for h in postponed.keys()])
541 def sorted_entries(self, resolve_ext_ref=None):
542 ret = list(self.iterentries(resolve_ext_ref))
546 def create_index_v1(self, filename, resolve_ext_ref=None):
547 entries = self.sorted_entries(resolve_ext_ref)
548 write_pack_index_v1(filename, entries, self.calculate_checksum())
550 def create_index_v2(self, filename, resolve_ext_ref=None):
551 entries = self.sorted_entries(resolve_ext_ref)
552 write_pack_index_v2(filename, entries, self.calculate_checksum())
554 def get_stored_checksum(self):
555 return self._stored_checksum
558 return (self.calculate_checksum() == self.get_stored_checksum())
560 def get_object_at(self, offset):
561 """Given an offset in to the packfile return the object that is there.
563 Using the associated index the location of an object can be looked up, and
564 then the packfile can be asked directly for that object using this
567 if offset in self._offset_cache:
568 return self._offset_cache[offset]
569 assert isinstance(offset, long) or isinstance(offset, int),\
570 "offset was %r" % offset
571 assert offset >= self._header_size
572 f = open(self._filename, 'rb')
574 map, map_offset = simple_mmap(f, offset, self._size-offset)
575 ret = unpack_object(map, map_offset)[:2]
581 class SHA1Writer(object):
583 def __init__(self, f):
585 self.sha1 = make_sha("")
587 def write(self, data):
588 self.sha1.update(data)
592 sha = self.sha1.digest()
593 assert len(sha) == 20
598 sha = self.write_sha()
606 def write_pack_object(f, type, object):
607 """Write pack object to a file.
609 :param f: File to write to
610 :param o: Object to write
611 :return: Tuple with offset at which the object was written, and crc32
615 if type == 6: # ref delta
616 (delta_base_offset, object) = object
617 elif type == 7: # offset delta
618 (basename, object) = object
620 c = (type << 4) | (size & 15)
623 packed_data_hdr += (chr(c | 0x80))
626 packed_data_hdr += chr(c)
627 if type == 6: # offset delta
628 ret = [delta_base_offset & 0x7f]
629 delta_base_offset >>= 7
630 while delta_base_offset:
631 delta_base_offset -= 1
632 ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
633 delta_base_offset >>= 7
634 packed_data_hdr += "".join([chr(x) for x in ret])
635 elif type == 7: # ref delta
636 assert len(basename) == 20
637 packed_data_hdr += basename
638 packed_data = packed_data_hdr + zlib.compress(object)
640 return (f.tell(), (zlib.crc32(packed_data) & 0xffffffff))
643 def write_pack(filename, objects, num_objects):
644 f = open(filename + ".pack", 'w')
646 entries, data_sum = write_pack_data(f, objects, num_objects)
650 write_pack_index_v2(filename + ".idx", entries, data_sum)
653 def write_pack_data(f, objects, num_objects, window=10):
654 """Write a new pack file.
656 :param filename: The filename of the new pack file.
657 :param objects: List of objects to write (tuples with object and path)
658 :return: List with (name, offset, crc32 checksum) entries, pack checksum
660 recency = list(objects)
661 # FIXME: Somehow limit delta depth
662 # FIXME: Make thin-pack optional (its not used when cloning a pack)
663 # Build a list of objects ordered by the magic Linus heuristic
664 # This helps us find good objects to diff against us
666 for obj, path in recency:
667 magic.append( (obj.type, path, 1, -len(obj.as_raw_string()[1]), obj) )
669 # Build a map of objects and their index in magic - so we can find preceeding objects
672 for i in range(len(magic)):
673 offs[magic[i][4]] = i
677 f.write("PACK") # Pack header
678 f.write(struct.pack(">L", 2)) # Pack version
679 f.write(struct.pack(">L", num_objects)) # Number of objects in pack
680 for o, path in recency:
681 sha1 = o.sha().digest()
682 orig_t, raw = o.as_raw_string()
685 #for i in range(offs[o]-window, window):
686 # if i < 0 or i >= len(offs): continue
688 # if b.type != orig_t: continue
689 # _, base = b.as_raw_string()
690 # delta = create_delta(base, raw)
691 # if len(delta) < len(winner):
693 # t = 6 if magic[i][2] == 1 else 7
694 offset, crc32 = write_pack_object(f, t, winner)
695 entries.append((sha1, offset, crc32))
696 return entries, f.write_sha()
699 def write_pack_index_v1(filename, entries, pack_checksum):
700 """Write a new pack index file.
702 :param filename: The filename of the new pack index file.
703 :param entries: List of tuples with object name (sha), offset_in_pack, and
705 :param pack_checksum: Checksum of the pack file.
707 f = open(filename, 'w')
709 fan_out_table = defaultdict(lambda: 0)
710 for (name, offset, entry_checksum) in entries:
711 fan_out_table[ord(name[0])] += 1
713 for i in range(0x100):
714 f.write(struct.pack(">L", fan_out_table[i]))
715 fan_out_table[i+1] += fan_out_table[i]
716 for (name, offset, entry_checksum) in entries:
717 f.write(struct.pack(">L20s", offset, name))
718 assert len(pack_checksum) == 20
719 f.write(pack_checksum)
723 def create_delta(base_buf, target_buf):
724 """Use python difflib to work out how to transform base_buf to target_buf"""
725 assert isinstance(base_buf, str)
726 assert isinstance(target_buf, str)
729 def encode_size(size):
739 out_buf += encode_size(len(base_buf))
740 out_buf += encode_size(len(target_buf))
741 # write out delta opcodes
742 seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
743 for opcode, i1, i2, j1, j2 in seq.get_opcodes():
744 # Git patch opcodes don't care about deletes!
745 #if opcode == "replace" or opcode == "delete":
747 if opcode == "equal":
748 # If they are equal, unpacker will use data from base_buf
749 # Write out an opcode that says what range to use
755 scratch += chr(o >> i)
760 scratch += chr(s >> i)
764 if opcode == "replace" or opcode == "insert":
765 # If we are replacing a range or adding one, then we just
766 # output it to the stream (prefixed by its size)
771 out_buf += target_buf[o:o+127]
775 out_buf += target_buf[o:o+s]
779 def apply_delta(src_buf, delta):
780 """Based on the similar function in git's patch-delta.c.
782 :param src_buf: Source buffer
783 :param delta: Delta instructions
785 assert isinstance(src_buf, str), "was %r" % (src_buf,)
786 assert isinstance(delta, str)
789 delta_length = len(delta)
790 def get_delta_header_size(delta, index):
794 cmd = ord(delta[index])
796 size |= (cmd & ~0x80) << i
801 src_size, index = get_delta_header_size(delta, index)
802 dest_size, index = get_delta_header_size(delta, index)
803 assert src_size == len(src_buf), "%d vs %d" % (src_size, len(src_buf))
804 while index < delta_length:
805 cmd = ord(delta[index])
811 x = ord(delta[index])
813 cp_off |= x << (i * 8)
816 if cmd & (1 << (4+i)):
817 x = ord(delta[index])
819 cp_size |= x << (i * 8)
822 if (cp_off + cp_size < cp_size or
823 cp_off + cp_size > src_size or
824 cp_size > dest_size):
826 out.append(src_buf[cp_off:cp_off+cp_size])
828 out.append(delta[index:index+cmd])
831 raise ApplyDeltaError("Invalid opcode 0")
833 if index != delta_length:
834 raise ApplyDeltaError("delta not empty: %r" % delta[index:])
837 if dest_size != len(out):
838 raise ApplyDeltaError("dest size incorrect")
843 def write_pack_index_v2(filename, entries, pack_checksum):
844 """Write a new pack index file.
846 :param filename: The filename of the new pack index file.
847 :param entries: List of tuples with object name (sha), offset_in_pack, and
849 :param pack_checksum: Checksum of the pack file.
851 f = open(filename, 'w')
853 f.write('\377tOc') # Magic!
854 f.write(struct.pack(">L", 2))
855 fan_out_table = defaultdict(lambda: 0)
856 for (name, offset, entry_checksum) in entries:
857 fan_out_table[ord(name[0])] += 1
859 for i in range(0x100):
860 f.write(struct.pack(">L", fan_out_table[i]))
861 fan_out_table[i+1] += fan_out_table[i]
862 for (name, offset, entry_checksum) in entries:
864 for (name, offset, entry_checksum) in entries:
865 f.write(struct.pack(">L", entry_checksum))
866 for (name, offset, entry_checksum) in entries:
867 # FIXME: handle if MSBit is set in offset
868 f.write(struct.pack(">L", offset))
869 # FIXME: handle table for pack files > 8 Gb
870 assert len(pack_checksum) == 20
871 f.write(pack_checksum)
877 def __init__(self, basename):
878 self._basename = basename
879 self._data_path = self._basename + ".pack"
880 self._idx_path = self._basename + ".idx"
885 """The SHA over the SHAs of the objects in this pack."""
886 return self.idx.objects_sha1()
890 if self._data is None:
891 self._data = PackData(self._data_path)
892 assert len(self.idx) == len(self._data)
893 idx_stored_checksum = self.idx.get_pack_checksum()
894 data_stored_checksum = self._data.get_stored_checksum()
895 if idx_stored_checksum != data_stored_checksum:
896 raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
897 sha_to_hex(data_stored_checksum))
902 if self._idx is None:
903 self._idx = load_pack_index(self._idx_path)
907 if self._data is not None:
911 def __eq__(self, other):
912 return type(self) == type(other) and self.idx == other.idx
915 """Number of entries in this pack."""
919 return "Pack(%r)" % self._basename
922 """Iterate over all the sha1s of the objects in this pack."""
923 return iter(self.idx)
926 if not self.idx.check():
928 if not self.data.check():
932 def get_stored_checksum(self):
933 return self.data.get_stored_checksum()
935 def __contains__(self, sha1):
936 """Check whether this pack contains a particular SHA1."""
937 return (self.idx.object_index(sha1) is not None)
939 def get_raw(self, sha1, resolve_ref=None):
940 offset = self.idx.object_index(sha1)
943 type, obj = self.data.get_object_at(offset)
944 if isinstance(offset, long):
946 if resolve_ref is None:
947 resolve_ref = self.get_raw
948 assert isinstance(offset, int)
949 return self.data.resolve_object(offset, type, obj, resolve_ref)
951 def __getitem__(self, sha1):
952 """Retrieve the specified SHA1."""
953 type, uncomp = self.get_raw(sha1)
954 return ShaFile.from_raw_string(type, uncomp)
956 def iterobjects(self, get_raw=None):
958 get_raw = self.get_raw
959 for offset, type, obj, crc32 in self.data.iterobjects():
960 assert isinstance(offset, int)
961 yield ShaFile.from_raw_string(
962 *self.data.resolve_object(offset, type, obj, get_raw))
965 def load_packs(path):
966 if not os.path.exists(path):
968 for name in os.listdir(path):
969 if name.startswith("pack-") and name.endswith(".pack"):
970 yield Pack(os.path.join(path, name[:-len(".pack")]))
974 from dulwich._pack import apply_delta