Add check() methods to object classes for consistency checking.
[jelmer/dulwich-libgit2.git] / dulwich / tests / test_objects.py
1 # test_objects.py -- tests for objects.py
2 # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; version 2
7 # of the License or (at your option) any later version of 
8 # the License.
9
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # MA  02110-1301, USA.
19
20
21 """Tests for git base objects."""
22
23 # TODO: Round-trip parse-serialize-parse and serialize-parse-serialize tests.
24
25
26 import datetime
27 import os
28 import stat
29 import unittest
30
31 from dulwich.errors import (
32     ObjectFormatException,
33     )
34 from dulwich.objects import (
35     Blob,
36     Tree,
37     Commit,
38     Tag,
39     format_timezone,
40     hex_to_sha,
41     check_hexsha,
42     check_identity,
43     parse_timezone,
44     parse_tree,
45     _parse_tree_py,
46     )
47 from dulwich.tests import (
48     TestSkipped,
49     )
50
51 a_sha = '6f670c0fb53f9463760b7295fbb814e965fb20c8'
52 b_sha = '2969be3e8ee1c0222396a5611407e4769f14e54b'
53 c_sha = '954a536f7819d40e6f637f849ee187dd10066349'
54 tree_sha = '70c190eb48fa8bbb50ddc692a17b44cb781af7f6'
55 tag_sha = '71033db03a03c6a36721efcf1968dd8f8e0cf023'
56
57 class BlobReadTests(unittest.TestCase):
58     """Test decompression of blobs"""
59   
60     def get_sha_file(self, obj, base, sha):
61         return obj.from_file(os.path.join(os.path.dirname(__file__),
62                                           'data', base, sha))
63   
64     def get_blob(self, sha):
65         """Return the blob named sha from the test data dir"""
66         return self.get_sha_file(Blob, 'blobs', sha)
67   
68     def get_tree(self, sha):
69         return self.get_sha_file(Tree, 'trees', sha)
70   
71     def get_tag(self, sha):
72         return self.get_sha_file(Tag, 'tags', sha)
73   
74     def commit(self, sha):
75         return self.get_sha_file(Commit, 'commits', sha)
76   
77     def test_decompress_simple_blob(self):
78         b = self.get_blob(a_sha)
79         self.assertEqual(b.data, 'test 1\n')
80         self.assertEqual(b.sha().hexdigest(), a_sha)
81   
82     def test_hash(self):
83         b = self.get_blob(a_sha)
84         self.assertEqual(hash(b.id), hash(b))
85
86     def test_parse_empty_blob_object(self):
87         sha = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
88         b = self.get_blob(sha)
89         self.assertEqual(b.data, '')
90         self.assertEqual(b.id, sha)
91         self.assertEqual(b.sha().hexdigest(), sha)
92   
93     def test_create_blob_from_string(self):
94         string = 'test 2\n'
95         b = Blob.from_string(string)
96         self.assertEqual(b.data, string)
97         self.assertEqual(b.sha().hexdigest(), b_sha)
98
99     def test_chunks(self):
100         string = 'test 5\n'
101         b = Blob.from_string(string)
102         self.assertEqual([string], b.chunked)
103
104     def test_set_chunks(self):
105         b = Blob()
106         b.chunked = ['te', 'st', ' 5\n']
107         self.assertEqual('test 5\n', b.data)
108         b.chunked = ['te', 'st', ' 6\n']
109         self.assertEqual('test 6\n', b.as_raw_string())
110   
111     def test_parse_legacy_blob(self):
112         string = 'test 3\n'
113         b = self.get_blob(c_sha)
114         self.assertEqual(b.data, string)
115         self.assertEqual(b.sha().hexdigest(), c_sha)
116   
117     def test_eq(self):
118         blob1 = self.get_blob(a_sha)
119         blob2 = self.get_blob(a_sha)
120         self.assertEqual(blob1, blob2)
121   
122     def test_read_tree_from_file(self):
123         t = self.get_tree(tree_sha)
124         self.assertEqual(t.entries()[0], (33188, 'a', a_sha))
125         self.assertEqual(t.entries()[1], (33188, 'b', b_sha))
126   
127     def test_read_tag_from_file(self):
128         t = self.get_tag(tag_sha)
129         self.assertEqual(t.object, (Commit, '51b668fd5bf7061b7d6fa525f88803e6cfadaa51'))
130         self.assertEqual(t.name,'signed')
131         self.assertEqual(t.tagger,'Ali Sabil <ali.sabil@gmail.com>')
132         self.assertEqual(t.tag_time, 1231203091)
133         self.assertEqual(t.message, 'This is a signed tag\n-----BEGIN PGP SIGNATURE-----\nVersion: GnuPG v1.4.9 (GNU/Linux)\n\niEYEABECAAYFAkliqx8ACgkQqSMmLy9u/kcx5ACfakZ9NnPl02tOyYP6pkBoEkU1\n5EcAn0UFgokaSvS371Ym/4W9iJj6vh3h\n=ql7y\n-----END PGP SIGNATURE-----\n')
134   
135   
136     def test_read_commit_from_file(self):
137         sha = '60dacdc733de308bb77bb76ce0fb0f9b44c9769e'
138         c = self.commit(sha)
139         self.assertEqual(c.tree, tree_sha)
140         self.assertEqual(c.parents, ['0d89f20333fbb1d2f3a94da77f4981373d8f4310'])
141         self.assertEqual(c.author,
142             'James Westby <jw+debian@jameswestby.net>')
143         self.assertEqual(c.committer,
144             'James Westby <jw+debian@jameswestby.net>')
145         self.assertEqual(c.commit_time, 1174759230)
146         self.assertEqual(c.commit_timezone, 0)
147         self.assertEqual(c.author_timezone, 0)
148         self.assertEqual(c.message, 'Test commit\n')
149   
150     def test_read_commit_no_parents(self):
151         sha = '0d89f20333fbb1d2f3a94da77f4981373d8f4310'
152         c = self.commit(sha)
153         self.assertEqual(c.tree, '90182552c4a85a45ec2a835cadc3451bebdfe870')
154         self.assertEqual(c.parents, [])
155         self.assertEqual(c.author,
156             'James Westby <jw+debian@jameswestby.net>')
157         self.assertEqual(c.committer,
158             'James Westby <jw+debian@jameswestby.net>')
159         self.assertEqual(c.commit_time, 1174758034)
160         self.assertEqual(c.commit_timezone, 0)
161         self.assertEqual(c.author_timezone, 0)
162         self.assertEqual(c.message, 'Test commit\n')
163   
164     def test_read_commit_two_parents(self):
165         sha = '5dac377bdded4c9aeb8dff595f0faeebcc8498cc'
166         c = self.commit(sha)
167         self.assertEqual(c.tree, 'd80c186a03f423a81b39df39dc87fd269736ca86')
168         self.assertEqual(c.parents, ['ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
169                                        '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'])
170         self.assertEqual(c.author,
171             'James Westby <jw+debian@jameswestby.net>')
172         self.assertEqual(c.committer,
173             'James Westby <jw+debian@jameswestby.net>')
174         self.assertEqual(c.commit_time, 1174773719)
175         self.assertEqual(c.commit_timezone, 0)
176         self.assertEqual(c.author_timezone, 0)
177         self.assertEqual(c.message, 'Merge ../b\n')
178
179
180 class ShaFileCheckTests(unittest.TestCase):
181
182     def assertCheckFails(self, obj, data):
183         obj.set_raw_string(data)
184         self.assertRaises(ObjectFormatException, obj.check)
185
186     def assertCheckSucceeds(self, obj, data):
187         obj.set_raw_string(data)
188         try:
189             obj.check()
190         except ObjectFormatException, e:
191             raise
192             self.fail(e)
193
194
195 class CommitSerializationTests(unittest.TestCase):
196
197     def make_base(self):
198         c = Commit()
199         c.tree = 'd80c186a03f423a81b39df39dc87fd269736ca86'
200         c.parents = ['ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd', '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6']
201         c.author = 'James Westby <jw+debian@jameswestby.net>'
202         c.committer = 'James Westby <jw+debian@jameswestby.net>'
203         c.commit_time = 1174773719
204         c.author_time = 1174773719
205         c.commit_timezone = 0
206         c.author_timezone = 0
207         c.message =  'Merge ../b\n'
208         return c
209
210     def test_encoding(self):
211         c = self.make_base()
212         c.encoding = "iso8859-1"
213         self.assertTrue("encoding iso8859-1\n" in c.as_raw_string())        
214
215     def test_short_timestamp(self):
216         c = self.make_base()
217         c.commit_time = 30
218         c1 = Commit()
219         c1.set_raw_string(c.as_raw_string())
220         self.assertEquals(30, c1.commit_time)
221
222     def test_raw_length(self):
223         c = self.make_base()
224         self.assertEquals(len(c.as_raw_string()), c.raw_length())
225
226     def test_simple(self):
227         c = self.make_base()
228         self.assertEquals(c.id, '5dac377bdded4c9aeb8dff595f0faeebcc8498cc')
229         self.assertEquals(
230                 'tree d80c186a03f423a81b39df39dc87fd269736ca86\n'
231                 'parent ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd\n'
232                 'parent 4cffe90e0a41ad3f5190079d7c8f036bde29cbe6\n'
233                 'author James Westby <jw+debian@jameswestby.net> 1174773719 +0000\n'
234                 'committer James Westby <jw+debian@jameswestby.net> 1174773719 +0000\n'
235                 '\n'
236                 'Merge ../b\n', c.as_raw_string())
237
238     def test_timezone(self):
239         c = self.make_base()
240         c.commit_timezone = 5 * 60
241         self.assertTrue(" +0005\n" in c.as_raw_string())
242
243     def test_neg_timezone(self):
244         c = self.make_base()
245         c.commit_timezone = -1 * 3600
246         self.assertTrue(" -0100\n" in c.as_raw_string())
247
248
249 default_committer = 'James Westby <jw+debian@jameswestby.net> 1174773719 +0000'
250
251 class CommitParseTests(ShaFileCheckTests):
252
253     def make_commit_text(self,
254                          tree='d80c186a03f423a81b39df39dc87fd269736ca86',
255                          parents=['ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
256                                   '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'],
257                          author=default_committer,
258                          committer=default_committer,
259                          encoding=None,
260                          message='Merge ../b\n',
261                          extra=None):
262         lines = []
263         if tree is not None:
264             lines.append('tree %s' % tree)
265         if parents is not None:
266             lines.extend('parent %s' % p for p in parents)
267         if author is not None:
268             lines.append('author %s' % author)
269         if committer is not None:
270             lines.append('committer %s' % committer)
271         if encoding is not None:
272             lines.append('encoding %s' % encoding)
273         if extra is not None:
274             for name, value in sorted(extra.iteritems()):
275                 lines.append('%s %s' % (name, value))
276         lines.append('')
277         if message is not None:
278             lines.append(message)
279         return '\n'.join(lines)
280
281     def test_simple(self):
282         c = Commit.from_string(self.make_commit_text())
283         self.assertEquals('Merge ../b\n', c.message)
284         self.assertEquals('James Westby <jw+debian@jameswestby.net>', c.author)
285         self.assertEquals('James Westby <jw+debian@jameswestby.net>',
286                           c.committer)
287         self.assertEquals('d80c186a03f423a81b39df39dc87fd269736ca86', c.tree)
288         self.assertEquals(['ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
289                            '4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'],
290                           c.parents)
291         expected_time = datetime.datetime(2007, 3, 24, 15, 1, 59)
292         self.assertEquals(expected_time,
293                           datetime.datetime.fromtimestamp(c.commit_time))
294         self.assertEquals(0, c.commit_timezone)
295         self.assertEquals(expected_time,
296                           datetime.datetime.fromtimestamp(c.author_time))
297         self.assertEquals(0, c.author_timezone)
298         self.assertEquals(None, c.encoding)
299
300     def test_custom(self):
301         c = Commit.from_string(self.make_commit_text(
302           extra={'extra-field': 'data'}))
303         self.assertEquals([('extra-field', 'data')], c.extra)
304
305     def test_encoding(self):
306         c = Commit.from_string(self.make_commit_text(encoding='UTF-8'))
307         self.assertEquals('UTF-8', c.encoding)
308
309     def test_check(self):
310         self.assertCheckSucceeds(Commit(), self.make_commit_text())
311         self.assertCheckSucceeds(Commit(), self.make_commit_text(parents=None))
312         self.assertCheckSucceeds(Commit(),
313                                  self.make_commit_text(encoding='UTF-8'))
314
315         self.assertCheckFails(Commit(), self.make_commit_text(tree='xxx'))
316         self.assertCheckFails(Commit(), self.make_commit_text(
317           parents=[a_sha, 'xxx']))
318         bad_committer = "some guy without an email address 1174773719 +0000"
319         self.assertCheckFails(Commit(),
320                               self.make_commit_text(committer=bad_committer))
321         self.assertCheckFails(Commit(),
322                               self.make_commit_text(author=bad_committer))
323         self.assertCheckFails(Commit(), self.make_commit_text(author=None))
324         self.assertCheckFails(Commit(), self.make_commit_text(committer=None))
325         self.assertCheckFails(Commit(), self.make_commit_text(
326           author=None, committer=None))
327
328
329 class TreeTests(ShaFileCheckTests):
330
331     def test_simple(self):
332         myhexsha = "d80c186a03f423a81b39df39dc87fd269736ca86"
333         x = Tree()
334         x["myname"] = (0100755, myhexsha)
335         self.assertEquals('100755 myname\0' + hex_to_sha(myhexsha),
336                 x.as_raw_string())
337
338     def test_tree_dir_sort(self):
339         x = Tree()
340         x["a.c"] = (0100755, "d80c186a03f423a81b39df39dc87fd269736ca86")
341         x["a"] = (stat.S_IFDIR, "d80c186a03f423a81b39df39dc87fd269736ca86")
342         x["a/c"] = (stat.S_IFDIR, "d80c186a03f423a81b39df39dc87fd269736ca86")
343         self.assertEquals(["a.c", "a", "a/c"], [p[0] for p in x.iteritems()])
344
345     def _do_test_parse_tree(self, parse_tree):
346         o = Tree.from_file(os.path.join(os.path.dirname(__file__), 'data',
347                                         'trees', tree_sha))
348         self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
349                           list(parse_tree(o.as_raw_string())))
350
351     def test_parse_tree(self):
352         self._do_test_parse_tree(_parse_tree_py)
353
354     def test_parse_tree_extension(self):
355         if parse_tree is _parse_tree_py:
356             raise TestSkipped('parse_tree extension not found')
357         self._do_test_parse_tree(parse_tree)
358
359     def test_check(self):
360         t = Tree()
361         sha = hex_to_sha(a_sha)
362
363         # filenames
364         self.assertCheckSucceeds(t, '100644 .a\0%s' % sha)
365         self.assertCheckFails(t, '100644 \0%s' % sha)
366         self.assertCheckFails(t, '100644 .\0%s' % sha)
367         self.assertCheckFails(t, '100644 a/a\0%s' % sha)
368         self.assertCheckFails(t, '100644 ..\0%s' % sha)
369
370         # modes
371         self.assertCheckSucceeds(t, '100644 a\0%s' % sha)
372         self.assertCheckSucceeds(t, '100755 a\0%s' % sha)
373         self.assertCheckSucceeds(t, '160000 a\0%s' % sha)
374         # TODO more whitelisted modes
375         self.assertCheckFails(t, '123456 a\0%s' % sha)
376         self.assertCheckFails(t, '123abc a\0%s' % sha)
377
378         # shas
379         self.assertCheckFails(t, '100644 a\0%s' % ('x' * 5))
380         self.assertCheckFails(t, '100644 a\0%s' % ('x' * 18 + '\0'))
381         self.assertCheckFails(t, '100644 a\0%s\n100644 b\0%s' % ('x' * 21, sha))
382
383         # ordering
384         sha2 = hex_to_sha(b_sha)
385         self.assertCheckSucceeds(t, '100644 a\0%s\n100644 b\0%s' % (sha, sha))
386         self.assertCheckSucceeds(t, '100644 a\0%s\n100644 b\0%s' % (sha, sha2))
387         self.assertCheckFails(t, '100644 a\0%s\n100755 a\0%s' % (sha, sha2))
388         self.assertCheckFails(t, '100644 b\0%s\n100644 a\0%s' % (sha2, sha))
389
390
391 class TagSerializeTests(unittest.TestCase):
392
393     def test_serialize_simple(self):
394         x = Tag()
395         x.tagger = "Jelmer Vernooij <jelmer@samba.org>"
396         x.name = "0.1"
397         x.message = "Tag 0.1"
398         x.object = (Blob, "d80c186a03f423a81b39df39dc87fd269736ca86")
399         x.tag_time = 423423423
400         x.tag_timezone = 0
401         self.assertEquals("""object d80c186a03f423a81b39df39dc87fd269736ca86
402 type blob
403 tag 0.1
404 tagger Jelmer Vernooij <jelmer@samba.org> 423423423 +0000
405
406 Tag 0.1""", x.as_raw_string())
407
408
409 default_tagger = ('Linus Torvalds <torvalds@woody.linux-foundation.org> '
410                   '1183319674 -0700')
411 default_message = """Linux 2.6.22-rc7
412 -----BEGIN PGP SIGNATURE-----
413 Version: GnuPG v1.4.7 (GNU/Linux)
414
415 iD8DBQBGiAaAF3YsRnbiHLsRAitMAKCiLboJkQECM/jpYsY3WPfvUgLXkACgg3ql
416 OK2XeQOiEeXtT76rV4t2WR4=
417 =ivrA
418 -----END PGP SIGNATURE-----
419 """
420
421
422 class TagParseTests(ShaFileCheckTests):
423     def make_tag_text(self,
424                       object_sha="a38d6181ff27824c79fc7df825164a212eff6a3f",
425                       object_type_name="commit",
426                       name="v2.6.22-rc7",
427                       tagger=default_tagger,
428                       message=default_message):
429         lines = []
430         if object_sha is not None:
431             lines.append("object %s" % object_sha)
432         if object_type_name is not None:
433             lines.append("type %s" % object_type_name)
434         if name is not None:
435             lines.append("tag %s" % name)
436         if tagger is not None:
437             lines.append("tagger %s" % tagger)
438         lines.append("")
439         if message is not None:
440             lines.append(message)
441         return "\n".join(lines)
442
443     def test_parse(self):
444         x = Tag()
445         x.set_raw_string(self.make_tag_text())
446         self.assertEquals(
447             "Linus Torvalds <torvalds@woody.linux-foundation.org>", x.tagger)
448         self.assertEquals("v2.6.22-rc7", x.name)
449         object_type, object_sha = x.object
450         self.assertEquals("a38d6181ff27824c79fc7df825164a212eff6a3f",
451                           object_sha)
452         self.assertEquals(Commit, object_type)
453         self.assertEquals(datetime.datetime.fromtimestamp(x.tag_time),
454                           datetime.datetime(2007, 7, 1, 12, 54, 34))
455         self.assertEquals(-25200, x.tag_timezone)
456
457     def test_parse_no_tagger(self):
458         x = Tag()
459         x.set_raw_string(self.make_tag_text(tagger=None))
460         self.assertEquals(None, x.tagger)
461         self.assertEquals("v2.6.22-rc7", x.name)
462
463     def test_check(self):
464         self.assertCheckSucceeds(Tag(), self.make_tag_text())
465         self.assertCheckFails(Tag(), self.make_tag_text(object_sha=None))
466         self.assertCheckFails(Tag(), self.make_tag_text(object_type_name=None))
467         self.assertCheckFails(Tag(), self.make_tag_text(name=None))
468         self.assertCheckFails(Tag(), self.make_tag_text(name=''))
469         self.assertCheckFails(Tag(), self.make_tag_text(
470           object_type_name="foobar"))
471         self.assertCheckFails(Tag(), self.make_tag_text(
472           tagger="some guy without an email address 1183319674 -0700"))
473         self.assertCheckFails(Tag(), self.make_tag_text(
474           tagger=("Linus Torvalds <torvalds@woody.linux-foundation.org> "
475                   "Sun 7 Jul 2007 12:54:34 +0700")))
476         self.assertCheckFails(Tag(), self.make_tag_text(object_sha="xxx"))
477
478
479 class CheckTests(unittest.TestCase):
480     def test_check_hexsha(self):
481         check_hexsha(a_sha, "failed to check good sha")
482         self.assertRaises(ObjectFormatException, check_hexsha, '1' * 39,
483                           'sha too short')
484         self.assertRaises(ObjectFormatException, check_hexsha, '1' * 41,
485                           'sha too long')
486         self.assertRaises(ObjectFormatException, check_hexsha, 'x' * 40,
487                           'invalid characters')
488
489     def test_check_identity(self):
490         check_identity("Dave Borowitz <dborowitz@google.com>",
491                        "failed to check good identity")
492         check_identity("<dborowitz@google.com>",
493                        "failed to check good identity")
494         self.assertRaises(ObjectFormatException, check_identity,
495                           "Dave Borowitz", "no email")
496         self.assertRaises(ObjectFormatException, check_identity,
497                           "Dave Borowitz <dborowitz", "incomplete email")
498         self.assertRaises(ObjectFormatException, check_identity,
499                           "dborowitz@google.com>", "incomplete email")
500         self.assertRaises(ObjectFormatException, check_identity,
501                           "Dave Borowitz <<dborowitz@google.com>", "typo")
502         self.assertRaises(ObjectFormatException, check_identity,
503                           "Dave Borowitz <dborowitz@google.com>>", "typo")
504         self.assertRaises(ObjectFormatException, check_identity,
505                           "Dave Borowitz <dborowitz@google.com>xxx",
506                           "trailing characters")
507
508
509 class TimezoneTests(unittest.TestCase):
510
511     def test_parse_timezone_utc(self):
512         self.assertEquals(0, parse_timezone("+0000"))
513
514     def test_generate_timezone_utc(self):
515         self.assertEquals("+0000", format_timezone(0))
516
517     def test_parse_timezone_cet(self):
518         self.assertEquals(60 * 60, parse_timezone("+0100"))
519
520     def test_format_timezone_cet(self):
521         self.assertEquals("+0100", format_timezone(60 * 60))
522
523     def test_format_timezone_pdt(self):
524         self.assertEquals("-0400", format_timezone(-4 * 60 * 60))
525
526     def test_parse_timezone_pdt(self):
527         self.assertEquals(-4 * 60 * 60, parse_timezone("-0400"))
528
529     def test_format_timezone_pdt_half(self):
530         self.assertEquals("-0440", format_timezone(int(((-4 * 60) - 40) * 60)))
531
532     def test_parse_timezone_pdt_half(self):
533         self.assertEquals(((-4 * 60) - 40) * 60, parse_timezone("-0440"))