X-Git-Url: http://git.samba.org/samba.git/?p=jelmer%2Fdulwich-libgit2.git;a=blobdiff_plain;f=dulwich%2Fpack.py;h=323dc9a00d9fa2fb219d4c68c7b0a0942eb3c9bc;hp=772e18c037d7ffdf0aaa34ed4799822632912d29;hb=2dbedff35566546f6ce5ae1d3e5ac836d663ac46;hpb=284a81d9313c326fb973aa9dabeb81febd7f3ad9 diff --git a/dulwich/pack.py b/dulwich/pack.py index 772e18c..323dc9a 100644 --- a/dulwich/pack.py +++ b/dulwich/pack.py @@ -1,6 +1,6 @@ # pack.py -- For dealing wih packed git objects. # Copyright (C) 2007 James Westby -# Copryight (C) 2008 Jelmer Vernooij +# Copryight (C) 2008-2009 Jelmer Vernooij # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -35,7 +35,12 @@ try: except ImportError: from misc import defaultdict -from itertools import imap, izip +import difflib +from itertools import ( + chain, + imap, + izip, + ) import mmap import os import struct @@ -45,12 +50,12 @@ except ImportError: from dulwich.misc import unpack_from import sys import zlib -import difflib from dulwich.errors import ( ApplyDeltaError, ChecksumMismatch, ) +from dulwich.file import GitFile from dulwich.lru_cache import ( LRUSizeCache, ) @@ -59,72 +64,120 @@ from dulwich.objects import ( hex_to_sha, sha_to_hex, ) -from dulwich.misc import make_sha +from dulwich.misc import ( + make_sha, + ) supports_mmap_offset = (sys.version_info[0] >= 3 or (sys.version_info[0] == 2 and sys.version_info[1] >= 6)) -def take_msb_bytes(map, offset): +def take_msb_bytes(read): + """Read bytes marked with most significant bit. + + :param read: Read function + """ ret = [] while len(ret) == 0 or ret[-1] & 0x80: - ret.append(ord(map[offset])) - offset += 1 + ret.append(ord(read(1))) return ret -def read_zlib(data, offset, dec_size): +def read_zlib_chunks(read, buffer_size=4096): + """Read chunks of zlib data from a buffer. + + :param read: Read function + :return: Tuple with list of chunks, length of + compressed data length and unused read data + """ obj = zlib.decompressobj() - x = "" + ret = [] fed = 0 while obj.unused_data == "": - base = offset+fed - add = data[base:base+1024] - if len(add) < 1024: + add = read(buffer_size) + if len(add) < buffer_size: add += "Z" fed += len(add) - x += obj.decompress(add) - assert len(x) == dec_size + ret.append(obj.decompress(add)) comp_len = fed-len(obj.unused_data) - return x, comp_len + return ret, comp_len, obj.unused_data + + +def read_zlib(read, dec_size): + """Read zlib-compressed data from a buffer. + + :param read: Read function + :param dec_size: Size of the decompressed buffer + :return: Uncompressed buffer, compressed buffer length and unused read + data. + """ + ret, comp_len, unused = read_zlib_chunks(read) + x = "".join(ret) + assert len(x) == dec_size + return x, comp_len, unused + def iter_sha1(iter): - """Return the hexdigest of the SHA1 over a set of names.""" + """Return the hexdigest of the SHA1 over a set of names. + + :param iter: Iterator over string objects + :return: 40-byte hex sha1 digest + """ sha1 = make_sha() for name in iter: sha1.update(name) return sha1.hexdigest() -def simple_mmap(f, offset, size, access=mmap.ACCESS_READ): - """Simple wrapper for mmap() which always supports the offset parameter. +def load_pack_index(path): + """Load an index file by path. - :param f: File object. - :param offset: Offset in the file, from the beginning of the file. - :param size: Size of the mmap'ed area - :param access: Access mechanism. - :return: MMAP'd area. + :param filename: Path to the index file """ - if supports_mmap_offset: - return mmap.mmap(f.fileno(), size, access=access, offset=offset), 0 - else: - mem = mmap.mmap(f.fileno(), size+offset, access=access) - return mem, offset + f = GitFile(path, 'rb') + return load_pack_index_file(path, f) -def load_pack_index(filename): - f = open(filename, 'r') +def load_pack_index_file(path, f): + """Load an index file from a file-like object. + + :param path: Path for the index file + :param f: File-like object + """ if f.read(4) == '\377tOc': version = struct.unpack(">L", f.read(4))[0] if version == 2: f.seek(0) - return PackIndex2(filename, file=f) + return PackIndex2(path, file=f) else: raise KeyError("Unknown pack index format %d" % version) else: f.seek(0) - return PackIndex1(filename, file=f) + return PackIndex1(path, file=f) + + +def bisect_find_sha(start, end, sha, unpack_name): + """Find a SHA in a data blob with sorted SHAs. + + :param start: Start index of range to search + :param end: End index of range to search + :param sha: Sha to find + :param unpack_name: Callback to retrieve SHA by index + :return: Index of the SHA, or None if it wasn't found + """ + assert start <= end + while start <= end: + i = (start + end)/2 + file_sha = unpack_name(i) + x = cmp(file_sha, sha) + if x < 0: + start = i + 1 + elif x > 0: + end = i - 1 + else: + return i + return None class PackIndex(object): @@ -141,7 +194,7 @@ class PackIndex(object): the start and end offset and then bisect in to find if the value is present. """ - def __init__(self, filename, file=None): + def __init__(self, filename, file=None, size=None): """Create a pack index object. Provide it with the name of the index file to consider, and it will map @@ -150,13 +203,23 @@ class PackIndex(object): self._filename = filename # Take the size now, so it can be checked each time we map the file to # ensure that it hasn't changed. - self._size = os.path.getsize(filename) if file is None: - self._file = open(filename, 'r') + self._file = GitFile(filename, 'rb') else: self._file = file - self._contents, map_offset = simple_mmap(self._file, 0, self._size) - assert map_offset == 0 + fileno = getattr(self._file, 'fileno', None) + if fileno is not None: + fd = self._file.fileno() + if size is None: + self._size = os.fstat(fd).st_size + else: + self._size = size + self._contents = mmap.mmap(fd, self._size, + access=mmap.ACCESS_READ) + else: + self._file.seek(0) + self._contents = self._file.read() + self._size = len(self._contents) def __eq__(self, other): if not isinstance(other, PackIndex): @@ -169,6 +232,9 @@ class PackIndex(object): if name1 != name2: return False return True + + def __ne__(self, other): + return not self.__eq__(other) def close(self): self._file.close() @@ -197,6 +263,7 @@ class PackIndex(object): raise NotImplementedError(self._unpack_crc32_checksum) def __iter__(self): + """Iterate over the SHAs in this pack.""" return imap(sha_to_hex, self._itersha()) def _itersha(self): @@ -226,21 +293,28 @@ class PackIndex(object): def check(self): """Check that the stored checksum matches the actual checksum.""" + # TODO: Check pack contents, too return self.calculate_checksum() == self.get_stored_checksum() def calculate_checksum(self): - f = open(self._filename, 'r') - try: - return make_sha(self._contents[:-20]).digest() - finally: - f.close() + """Calculate the SHA1 checksum over this pack index. + + :return: This is a 20-byte binary digest + """ + return make_sha(self._contents[:-20]).digest() def get_pack_checksum(self): - """Return the SHA1 checksum stored for the corresponding packfile.""" + """Return the SHA1 checksum stored for the corresponding packfile. + + :return: 20-byte binary digest + """ return str(self._contents[-40:-20]) def get_stored_checksum(self): - """Return the SHA1 checksum stored for this index.""" + """Return the SHA1 checksum stored for this index. + + :return: 20-byte binary digest + """ return str(self._contents[-20:]) def object_index(self, sha): @@ -266,24 +340,18 @@ class PackIndex(object): else: start = self._fan_out_table[idx-1] end = self._fan_out_table[idx] - assert start <= end - while start <= end: - i = (start + end)/2 - file_sha = self._unpack_name(i) - if file_sha < sha: - start = i + 1 - elif file_sha > sha: - end = i - 1 - else: - return self._unpack_offset(i) - return None + i = bisect_find_sha(start, end, sha, self._unpack_name) + if i is None: + raise KeyError(sha) + return self._unpack_offset(i) + class PackIndex1(PackIndex): """Version 1 Pack Index.""" - def __init__(self, filename, file=None): - PackIndex.__init__(self, filename, file) + def __init__(self, filename, file=None, size=None): + PackIndex.__init__(self, filename, file, size) self.version = 1 self._fan_out_table = self._read_fan_out_table(0) @@ -308,8 +376,8 @@ class PackIndex1(PackIndex): class PackIndex2(PackIndex): """Version 2 Pack Index.""" - def __init__(self, filename, file=None): - PackIndex.__init__(self, filename, file) + def __init__(self, filename, file=None, size=None): + PackIndex.__init__(self, filename, file, size) assert self._contents[:4] == '\377tOc', "Not a v2 pack index file" (self.version, ) = unpack_from(">L", self._contents, 4) assert self.version == 2, "Version was %d" % self.version @@ -337,6 +405,10 @@ class PackIndex2(PackIndex): def read_pack_header(f): + """Read the header of a pack file. + + :param f: File-like object to read from + """ header = f.read(12) assert header[:4] == "PACK" (version,) = unpack_from(">L", header, 4) @@ -345,41 +417,45 @@ def read_pack_header(f): return (version, num_objects) -def read_pack_tail(f): - return (f.read(20),) - +def unpack_object(read): + """Unpack a Git object. -def unpack_object(map, offset=0): - bytes = take_msb_bytes(map, offset) + :return: tuple with type, uncompressed data, compressed size and + tail data + """ + bytes = take_msb_bytes(read) type = (bytes[0] >> 4) & 0x07 size = bytes[0] & 0x0f for i, byte in enumerate(bytes[1:]): size += (byte & 0x7f) << ((i * 7) + 4) raw_base = len(bytes) if type == 6: # offset delta - bytes = take_msb_bytes(map, raw_base + offset) + bytes = take_msb_bytes(read) + raw_base += len(bytes) assert not (bytes[-1] & 0x80) delta_base_offset = bytes[0] & 0x7f for byte in bytes[1:]: delta_base_offset += 1 delta_base_offset <<= 7 delta_base_offset += (byte & 0x7f) - raw_base+=len(bytes) - uncomp, comp_len = read_zlib(map, offset + raw_base, size) + uncomp, comp_len, unused = read_zlib(read, size) assert size == len(uncomp) - return type, (delta_base_offset, uncomp), comp_len+raw_base + return type, (delta_base_offset, uncomp), comp_len+raw_base, unused elif type == 7: # ref delta - basename = map[offset+raw_base:offset+raw_base+20] - uncomp, comp_len = read_zlib(map, offset+raw_base+20, size) + basename = read(20) + raw_base += 20 + uncomp, comp_len, unused = read_zlib(read, size) assert size == len(uncomp) - return type, (basename, uncomp), comp_len+raw_base+20 + return type, (basename, uncomp), comp_len+raw_base, unused else: - uncomp, comp_len = read_zlib(map, offset+raw_base, size) + uncomp, comp_len, unused = read_zlib(read, size) assert len(uncomp) == size - return type, uncomp, comp_len+raw_base + return type, uncomp, comp_len+raw_base, unused -def compute_object_size((num, obj)): +def _compute_object_size((num, obj)): + """Compute the size of a unresolved object for use with LRUSizeCache. + """ if num in (6, 7): return len(obj[1]) assert isinstance(obj, str) @@ -413,7 +489,7 @@ class PackData(object): It will all just throw a zlib or KeyError. """ - def __init__(self, filename): + def __init__(self, filename, file=None, size=None): """Create a PackData object that represents the pack in the given filename. The file must exist and stay readable until the object is disposed of. It @@ -423,36 +499,51 @@ class PackData(object): mmap implementation is flawed. """ self._filename = filename - assert os.path.exists(filename), "%s is not a packfile" % filename - self._size = os.path.getsize(filename) + self._size = size self._header_size = 12 - assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size) - self._read_header() - self._offset_cache = LRUSizeCache(1024*1024*100, - compute_size=compute_object_size) - - def _read_header(self): - f = open(self._filename, 'rb') - try: - (version, self._num_objects) = \ - read_pack_header(f) - f.seek(self._size-20) - (self._stored_checksum,) = read_pack_tail(f) - finally: - f.close() + if file is None: + self._file = GitFile(self._filename, 'rb') + else: + self._file = file + (version, self._num_objects) = read_pack_header(self._file) + self._offset_cache = LRUSizeCache(1024*1024*20, + compute_size=_compute_object_size) + + @classmethod + def from_file(cls, file, size): + return cls(str(file), file=file, size=size) + + @classmethod + def from_path(cls, path): + return cls(filename=path) + + def close(self): + self._file.close() + + def _get_size(self): + if self._size is not None: + return self._size + self._size = os.path.getsize(self._filename) + assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (self._filename, self._size, self._header_size) + return self._size def __len__(self): """Returns the number of objects in this pack.""" return self._num_objects def calculate_checksum(self): - """Calculate the checksum for this pack.""" - f = open(self._filename, 'rb') - try: - map, map_offset = simple_mmap(f, 0, self._size - 20) - return make_sha(map[map_offset:self._size-20]).digest() - finally: - f.close() + """Calculate the checksum for this pack. + + :return: 20-byte binary SHA1 digest + """ + s = make_sha() + self._file.seek(0) + todo = self._get_size() - 20 + while todo > 0: + x = self._file.read(min(todo, 1<<16)) + s.update(x) + todo -= len(x) + return s.digest() def resolve_object(self, offset, type, obj, get_ref, get_offset=None): """Resolve an object, possibly resolving deltas when necessary. @@ -487,36 +578,65 @@ class PackData(object): ret = (type, apply_delta(base_text, delta)) return ret - def iterobjects(self): - offset = self._header_size - f = open(self._filename, 'rb') - num = len(self) - map, _ = simple_mmap(f, 0, self._size) - for i in range(num): - (type, obj, total_size) = unpack_object(map, offset) - crc32 = zlib.crc32(map[offset:offset+total_size]) & 0xffffffff - yield offset, type, obj, crc32 - offset += total_size - f.close() + def iterobjects(self, progress=None): + + class ObjectIterator(object): + + def __init__(self, pack): + self.i = 0 + self.offset = pack._header_size + self.num = len(pack) + self.map = pack._file + + def __iter__(self): + return self + + def __len__(self): + return self.num + + def next(self): + if self.i == self.num: + raise StopIteration + self.map.seek(self.offset) + (type, obj, total_size, unused) = unpack_object(self.map.read) + self.map.seek(self.offset) + crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff + ret = (self.offset, type, obj, crc32) + self.offset += total_size + if progress: + progress(self.i, self.num) + self.i+=1 + return ret + return ObjectIterator(self) - def iterentries(self, ext_resolve_ref=None): + def iterentries(self, ext_resolve_ref=None, progress=None): + """Yield entries summarizing the contents of this pack. + + :param ext_resolve_ref: Optional function to resolve base + objects (in case this is a thin pack) + :param progress: Progress function, called with current and + total object count. + + This will yield tuples with (sha, offset, crc32) + """ found = {} postponed = defaultdict(list) class Postpone(Exception): """Raised to postpone delta resolving.""" def get_ref_text(sha): + assert len(sha) == 20 if sha in found: - return found[sha] + return self.get_object_at(found[sha]) if ext_resolve_ref: try: return ext_resolve_ref(sha) except KeyError: pass raise Postpone, (sha, ) - todo = list(self.iterobjects()) - while todo: - (offset, type, obj, crc32) = todo.pop(0) + extra = [] + todo = chain(self.iterobjects(progress=progress), extra) + for (offset, type, obj, crc32) in todo: assert isinstance(offset, int) assert isinstance(type, int) assert isinstance(obj, tuple) or isinstance(obj, str) @@ -527,29 +647,70 @@ class PackData(object): else: shafile = ShaFile.from_raw_string(type, obj) sha = shafile.sha().digest() - found[sha] = (type, obj) + found[sha] = offset yield sha, offset, crc32 - todo += postponed.get(sha, []) + extra.extend(postponed.get(sha, [])) if postponed: raise KeyError([sha_to_hex(h) for h in postponed.keys()]) - def sorted_entries(self, resolve_ext_ref=None): - ret = list(self.iterentries(resolve_ext_ref)) + def sorted_entries(self, resolve_ext_ref=None, progress=None): + """Return entries in this pack, sorted by SHA. + + :param ext_resolve_ref: Optional function to resolve base + objects (in case this is a thin pack) + :param progress: Progress function, called with current and + total object count. + :return: List of tuples with (sha, offset, crc32) + """ + ret = list(self.iterentries(resolve_ext_ref, progress=progress)) ret.sort() return ret - def create_index_v1(self, filename, resolve_ext_ref=None): - entries = self.sorted_entries(resolve_ext_ref) + def create_index_v1(self, filename, resolve_ext_ref=None, progress=None): + """Create a version 1 file for this data file. + + :param filename: Index filename. + :param resolve_ext_ref: Function to use for resolving externally referenced + SHA1s (for thin packs) + :param progress: Progress report function + """ + entries = self.sorted_entries(resolve_ext_ref, progress=progress) write_pack_index_v1(filename, entries, self.calculate_checksum()) - def create_index_v2(self, filename, resolve_ext_ref=None): - entries = self.sorted_entries(resolve_ext_ref) + def create_index_v2(self, filename, resolve_ext_ref=None, progress=None): + """Create a version 2 index file for this data file. + + :param filename: Index filename. + :param resolve_ext_ref: Function to use for resolving externally referenced + SHA1s (for thin packs) + :param progress: Progress report function + """ + entries = self.sorted_entries(resolve_ext_ref, progress=progress) write_pack_index_v2(filename, entries, self.calculate_checksum()) + + def create_index(self, filename, resolve_ext_ref=None, progress=None, + version=2): + """Create an index file for this data file. + + :param filename: Index filename. + :param resolve_ext_ref: Function to use for resolving externally referenced + SHA1s (for thin packs) + :param progress: Progress report function + """ + if version == 1: + self.create_index_v1(filename, resolve_ext_ref, progress) + elif version == 2: + self.create_index_v2(filename, resolve_ext_ref, progress) + else: + raise ValueError("unknown index format %d" % version) def get_stored_checksum(self): - return self._stored_checksum + """Return the expected checksum stored in this pack.""" + self._file.seek(self._get_size()-20) + return self._file.read(20) def check(self): + """Check the consistency of this pack.""" return (self.calculate_checksum() == self.get_stored_checksum()) def get_object_at(self, offset): @@ -564,16 +725,38 @@ class PackData(object): assert isinstance(offset, long) or isinstance(offset, int),\ "offset was %r" % offset assert offset >= self._header_size - f = open(self._filename, 'rb') - try: - map, map_offset = simple_mmap(f, offset, self._size-offset) - ret = unpack_object(map, map_offset)[:2] - return ret - finally: - f.close() + self._file.seek(offset) + return unpack_object(self._file.read)[:2] + + +class SHA1Reader(object): + """Wrapper around a file-like object that remembers the SHA1 of + the data read from it.""" + + def __init__(self, f): + self.f = f + self.sha1 = make_sha("") + + def read(self, num=None): + data = self.f.read(num) + self.sha1.update(data) + return data + + def check_sha(self): + stored = self.f.read(20) + if stored != self.sha1.digest(): + raise ChecksumMismatch(self.sha1.hexdigest(), sha_to_hex(stored)) + + def close(self): + return self.f.close() + + def tell(self): + return self.f.tell() class SHA1Writer(object): + """Wrapper around a file-like object that remembers the SHA1 of + the data written to it.""" def __init__(self, f): self.f = f @@ -605,11 +788,11 @@ def write_pack_object(f, type, object): :param o: Object to write :return: Tuple with offset at which the object was written, and crc32 """ - ret = f.tell() + offset = f.tell() packed_data_hdr = "" - if type == 6: # ref delta + if type == 6: # offset delta (delta_base_offset, object) = object - elif type == 7: # offset delta + elif type == 7: # ref delta (basename, object) = object size = len(object) c = (type << 4) | (size & 15) @@ -632,11 +815,17 @@ def write_pack_object(f, type, object): packed_data_hdr += basename packed_data = packed_data_hdr + zlib.compress(object) f.write(packed_data) - return (f.tell(), (zlib.crc32(packed_data) & 0xffffffff)) + return (offset, (zlib.crc32(packed_data) & 0xffffffff)) def write_pack(filename, objects, num_objects): - f = open(filename + ".pack", 'w') + """Write a new pack data file. + + :param filename: Path to the new pack file (without .pack extension) + :param objects: Iterable over (object, path) tuples to write + :param num_objects: Number of objects to write + """ + f = GitFile(filename + ".pack", 'wb') try: entries, data_sum = write_pack_data(f, objects, num_objects) finally: @@ -659,7 +848,7 @@ def write_pack_data(f, objects, num_objects, window=10): # This helps us find good objects to diff against us magic = [] for obj, path in recency: - magic.append( (obj.type, path, 1, -len(obj.as_raw_string()[1]), obj) ) + magic.append( (obj.type, path, 1, -obj.raw_length(), obj) ) magic.sort() # Build a map of objects and their index in magic - so we can find preceeding objects # to diff against @@ -674,14 +863,15 @@ def write_pack_data(f, objects, num_objects, window=10): f.write(struct.pack(">L", num_objects)) # Number of objects in pack for o, path in recency: sha1 = o.sha().digest() - orig_t, raw = o.as_raw_string() + orig_t = o.type + raw = o.as_raw_string() winner = raw t = orig_t #for i in range(offs[o]-window, window): # if i < 0 or i >= len(offs): continue # b = magic[i][4] # if b.type != orig_t: continue - # _, base = b.as_raw_string() + # base = b.as_raw_string() # delta = create_delta(base, raw) # if len(delta) < len(winner): # winner = delta @@ -699,7 +889,7 @@ def write_pack_index_v1(filename, entries, pack_checksum): crc32_checksum. :param pack_checksum: Checksum of the pack file. """ - f = open(filename, 'w') + f = GitFile(filename, 'wb') f = SHA1Writer(f) fan_out_table = defaultdict(lambda: 0) for (name, offset, entry_checksum) in entries: @@ -716,7 +906,11 @@ def write_pack_index_v1(filename, entries, pack_checksum): def create_delta(base_buf, target_buf): - """Use python difflib to work out how to transform base_buf to target_buf""" + """Use python difflib to work out how to transform base_buf to target_buf. + + :param base_buf: Base buffer + :param target_buf: Target buffer + """ assert isinstance(base_buf, str) assert isinstance(target_buf, str) out_buf = "" @@ -747,12 +941,12 @@ def create_delta(base_buf, target_buf): o = i1 for i in range(4): if o & 0xff << i*8: - scratch += chr(o >> i) + scratch += chr((o >> i*8) & 0xff) op |= 1 << i s = i2 - i1 for i in range(2): if s & 0xff << i*8: - scratch += chr(s >> i) + scratch += chr((s >> i*8) & 0xff) op |= 1 << (4+i) out_buf += chr(op) out_buf += scratch @@ -843,7 +1037,7 @@ def write_pack_index_v2(filename, entries, pack_checksum): crc32_checksum. :param pack_checksum: Checksum of the pack file. """ - f = open(filename, 'w') + f = GitFile(filename, 'wb') f = SHA1Writer(f) f.write('\377tOc') # Magic! f.write(struct.pack(">L", 2)) @@ -868,6 +1062,7 @@ def write_pack_index_v2(filename, entries, pack_checksum): class Pack(object): + """A Git pack object.""" def __init__(self, basename): self._basename = basename @@ -876,16 +1071,25 @@ class Pack(object): self._data = None self._idx = None + @classmethod + def from_objects(self, data, idx): + """Create a new pack object from pack data and index objects.""" + ret = Pack("") + ret._data = data + ret._idx = idx + return ret + def name(self): """The SHA over the SHAs of the objects in this pack.""" - return self.idx.objects_sha1() + return self.index.objects_sha1() @property def data(self): + """The pack data object being used.""" if self._data is None: self._data = PackData(self._data_path) - assert len(self.idx) == len(self._data) - idx_stored_checksum = self.idx.get_pack_checksum() + assert len(self.index) == len(self._data) + idx_stored_checksum = self.index.get_pack_checksum() data_stored_checksum = self._data.get_stored_checksum() if idx_stored_checksum != data_stored_checksum: raise ChecksumMismatch(sha_to_hex(idx_stored_checksum), @@ -893,7 +1097,11 @@ class Pack(object): return self._data @property - def idx(self): + def index(self): + """The index being used. + + :note: This may be an in-memory index + """ if self._idx is None: self._idx = load_pack_index(self._idx_path) return self._idx @@ -901,24 +1109,25 @@ class Pack(object): def close(self): if self._data is not None: self._data.close() - self.idx.close() + self.index.close() def __eq__(self, other): - return type(self) == type(other) and self.idx == other.idx + return type(self) == type(other) and self.index == other.index def __len__(self): """Number of entries in this pack.""" - return len(self.idx) + return len(self.index) def __repr__(self): - return "Pack(%r)" % self._basename + return "%s(%r)" % (self.__class__.__name__, self._basename) def __iter__(self): """Iterate over all the sha1s of the objects in this pack.""" - return iter(self.idx) + return iter(self.index) def check(self): - if not self.idx.check(): + """Check the integrity of this pack.""" + if not self.index.check(): return False if not self.data.check(): return False @@ -929,19 +1138,20 @@ class Pack(object): def __contains__(self, sha1): """Check whether this pack contains a particular SHA1.""" - return (self.idx.object_index(sha1) is not None) + try: + self.index.object_index(sha1) + return True + except KeyError: + return False def get_raw(self, sha1, resolve_ref=None): - offset = self.idx.object_index(sha1) - if offset is None: - raise KeyError(sha1) - type, obj = self.data.get_object_at(offset) - if isinstance(offset, long): + offset = self.index.object_index(sha1) + obj_type, obj = self.data.get_object_at(offset) + if type(offset) is long: offset = int(offset) if resolve_ref is None: resolve_ref = self.get_raw - assert isinstance(offset, int) - return self.data.resolve_object(offset, type, obj, resolve_ref) + return self.data.resolve_object(offset, obj_type, obj, resolve_ref) def __getitem__(self, sha1): """Retrieve the specified SHA1.""" @@ -949,6 +1159,7 @@ class Pack(object): return ShaFile.from_raw_string(type, uncomp) def iterobjects(self, get_raw=None): + """Iterate over the objects in this pack.""" if get_raw is None: get_raw = self.get_raw for offset, type, obj, crc32 in self.data.iterobjects(): @@ -957,15 +1168,7 @@ class Pack(object): *self.data.resolve_object(offset, type, obj, get_raw)) -def load_packs(path): - if not os.path.exists(path): - return - for name in os.listdir(path): - if name.startswith("pack-") and name.endswith(".pack"): - yield Pack(os.path.join(path, name[:-len(".pack")])) - - try: - from dulwich._pack import apply_delta + from dulwich._pack import apply_delta, bisect_find_sha except ImportError: pass