Use python-fastimport.
[jelmer/dulwich-libgit2.git] / dulwich / fastexport.py
1 # __init__.py -- Fast export/import functionality
2 # Copyright (C) 2010 Jelmer Vernooij <jelmer@samba.org>
3 #
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; version 2
7 # of the License or (at your option) any later version of
8 # the License.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # MA  02110-1301, USA.
19
20
21 """Fast export/import functionality."""
22
23 from dulwich.index import (
24     commit_tree,
25     )
26 from dulwich.objects import (
27     Blob,
28     Commit,
29     Tag,
30     format_timezone,
31     parse_timezone,
32     )
33 from fastimport import (
34     commands,
35     errors as fastimport_errors,
36     processor,
37     )
38
39 import stat
40
41
42 class GitFastExporter(object):
43     """Generate a fast-export output stream for Git objects."""
44
45     def __init__(self, outf, store):
46         self.outf = outf
47         self.store = store
48         self.markers = {}
49         self._marker_idx = 0
50
51     def _allocate_marker(self):
52         self._marker_idx+=1
53         return str(self._marker_idx)
54
55     def _dump_blob(self, blob, mark):
56         cmd = commands.BlobCommand(mark, blob.data)
57         self.outf.write(str(cmd)+"\n")
58
59     def export_blob(self, blob):
60         i = self._allocate_marker()
61         self.markers[i] = blob.id
62         self._dump_blob(blob, i)
63         return i
64
65     def _dump_commit(self, commit, mark, ref, file_cmds):
66         if commit.parents:
67             from_ = commit.parents[0]
68             merges = commit.parents[1:]
69         else:
70             from_ = None
71             merges = []
72         cmd = commands.CommitCommand(ref, mark,
73             commit.author, commit.committer,
74             commit.message, from_, merges, file_cmds)
75         self.outf.write(str(cmd))
76
77     def export_commit(self, commit, ref, base_tree=None):
78         file_cmds = []
79         for (old_path, new_path), (old_mode, new_mode), (old_hexsha, new_hexsha) in \
80                 self.store.tree_changes(base_tree, commit.tree):
81             if new_path is None:
82                 file_cmds.append(commands.FileDeleteCommand(old_path))
83                 continue
84             if not stat.S_ISDIR(new_mode):
85                 marker = self.export_blob(self.store[new_hexsha])
86             if old_path != new_path and old_path is not None:
87                 file_cmds.append(commands.FileRenameCommand(old_path, new_path))
88             if old_mode != new_mode or old_hexsha != new_hexsha:
89                 file_cmds.append(commands.FileModifyCommand(new_mode, marker, new_path))
90         i = self._allocate_marker()
91         self._dump_commit(commit, i, ref, file_cmds)
92         return i
93
94
95 class FastImporter(object):
96     """Class for importing fastimport streams.
97
98     Please note that this is mostly a stub implementation at the moment,
99     doing the bare mimimum.
100     """
101
102     def __init__(self, repo):
103         self.repo = repo
104
105     def _parse_person(self, line):
106         (name, timestr, timezonestr) = line.rsplit(" ", 2)
107         return name, int(timestr), parse_timezone(timezonestr)[0]
108
109     def _read_blob(self, stream):
110         line = stream.readline()
111         if line.startswith("mark :"):
112             mark = line[len("mark :"):-1]
113             line = stream.readline()
114         else:
115             mark = None
116         if not line.startswith("data "):
117             raise ValueError("Blob without valid data line: %s" % line)
118         size = int(line[len("data "):])
119         o = Blob()
120         o.data = stream.read(size)
121         stream.readline()
122         self.repo.object_store.add_object(o)
123         return mark, o.id
124
125     def _read_commit(self, stream, contents, marks):
126         line = stream.readline()
127         if line.startswith("mark :"):
128             mark = line[len("mark :"):-1]
129             line = stream.readline()
130         else:
131             mark = None
132         o = Commit()
133         o.author = None
134         o.author_time = None
135         while line.startswith("author "):
136             (o.author, o.author_time, o.author_timezone) = \
137                     self._parse_person(line[len("author "):-1])
138             line = stream.readline()
139         while line.startswith("committer "):
140             (o.committer, o.commit_time, o.commit_timezone) = \
141                     self._parse_person(line[len("committer "):-1])
142             line = stream.readline()
143         if o.author is None:
144             o.author = o.committer
145         if o.author_time is None:
146             o.author_time = o.commit_time
147             o.author_timezone = o.commit_timezone
148         if not line.startswith("data "):
149             raise ValueError("Blob without valid data line: %s" % line)
150         size = int(line[len("data "):])
151         o.message = stream.read(size)
152         stream.readline()
153         line = stream.readline()[:-1]
154         while line:
155             if line.startswith("M "):
156                 (kind, modestr, val, path) = line.split(" ")
157                 if val[0] == ":":
158                     val = marks[int(val[1:])]
159                 contents[path] = (int(modestr, 8), val)
160             else:
161                 raise ValueError(line)
162             line = stream.readline()[:-1]
163         try:
164             o.parents = (self.repo.head(),)
165         except KeyError:
166             o.parents = ()
167         o.tree = commit_tree(self.repo.object_store,
168             ((path, hexsha, mode) for (path, (mode, hexsha)) in
169                 contents.iteritems()))
170         self.repo.object_store.add_object(o)
171         return mark, o.id
172
173     def import_stream(self, stream):
174         """Import from a file-like object.
175
176         :param stream: File-like object to read a fastimport stream from.
177         :return: Dictionary with marks
178         """
179         contents = {}
180         marks = {}
181         while True:
182             line = stream.readline()
183             if not line:
184                 break
185             line = line[:-1]
186             if line == "" or line[0] == "#":
187                 continue
188             if line.startswith("blob"):
189                 mark, hexsha = self._read_blob(stream)
190                 if mark is not None:
191                     marks[int(mark)] = hexsha
192             elif line.startswith("commit "):
193                 ref = line[len("commit "):-1]
194                 mark, hexsha = self._read_commit(stream, contents, marks)
195                 if mark is not None:
196                     marks[int(mark)] = hexsha
197                 self.repo.refs["HEAD"] = self.repo.refs[ref] = hexsha
198             else:
199                 raise ValueError("invalid command '%s'" % line)
200         return marks
201
202
203 class GitImportProcessor(processor.ImportProcessor):
204     """An import processor that imports into a Git repository using Dulwich.
205
206     """
207
208     def __init__(self, repo, params=None, verbose=False, outf=None):
209         processor.ImportProcessor.__init__(self, params, verbose)
210         self.repo = repo
211         self.last_commit = None
212
213     def blob_handler(self, cmd):
214         """Process a BlobCommand."""
215         self.repo.object_store.add_object(Blob.from_string(cmd.data))
216
217     def checkpoint_handler(self, cmd):
218         """Process a CheckpointCommand."""
219         pass
220
221     def commit_handler(self, cmd):
222         """Process a CommitCommand."""
223         commit = Commit()
224         commit.author = cmd.author
225         commit.committer = cmd.committer
226         commit.message = cmd.message
227         commit.parents = []
228         if self.last_commit is not None:
229             commit.parents.append(self.last_commit)
230         commit.parents += cmd.merges
231         self.repo[cmd.ref] = commit.id
232         self.last_commit = commit.id
233
234     def progress_handler(self, cmd):
235         """Process a ProgressCommand."""
236         pass
237
238     def reset_handler(self, cmd):
239         """Process a ResetCommand."""
240         self.last_commit = cmd.from_
241         self.rep.refs[cmd.from_] = cmd.id
242
243     def tag_handler(self, cmd):
244         """Process a TagCommand."""
245         tag = Tag()
246         tag.tagger = cmd.tagger
247         tag.message = cmd.message
248         tag.name = cmd.tag
249         self.repo.add_object(tag)
250         self.repo.refs["refs/tags/" + tag.name] = tag.id
251
252     def feature_handler(self, cmd):
253         """Process a FeatureCommand."""
254         raise fastimport_errors.UnknownFeature(cmd.feature_name)