Avoid stripping newlines in blob data.
[jelmer/gitpython.git] / lib / git / blob.py
1 # blob.py
2 # Copyright (C) 2008 Michael Trier (mtrier@gmail.com) and contributors
3 #
4 # This module is part of GitPython and is released under
5 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
6
7 import mimetypes
8 import os
9 import re
10 import time
11 from actor import Actor
12 from commit import Commit
13
14 class Blob(object):
15     DEFAULT_MIME_TYPE = "text/plain"
16
17     def __init__(self, repo, **kwargs):
18         """
19         Create an unbaked Blob containing just the specified attributes
20
21         ``repo``
22             is the Repo
23
24         ``atts``
25             is a dict of instance variable data
26
27         Returns
28             GitPython.Blob
29         """
30         self.id = None
31         self.mode = None
32         self.name = None
33         self._size = None
34         self.data_stored  = None
35
36         self.repo = repo
37         for k, v in kwargs.items():
38             setattr(self, k, v)
39
40     @property
41     def size(self):
42         """
43         The size of this blob in bytes
44
45         Returns
46             int
47         """
48         if self._size is None:
49             self._size = int(self.repo.git.cat_file(self.id, **{'s': True}).rstrip())
50         return self._size
51
52     @property
53     def data(self):
54         """
55         The binary contents of this blob.
56
57         Returns
58             str
59         """
60         self.data_stored = self.data_stored or self.repo.git.cat_file(self.id, **{'p': True, 'with_raw_output': True})
61         return self.data_stored
62
63     @property
64     def mime_type(self):
65         """
66         The mime type of this file (based on the filename)
67
68         Returns
69             str
70         """
71         guesses = None
72         if self.name:
73             guesses = mimetypes.guess_type(self.name)
74         return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
75
76     @property
77     def basename(self):
78       return os.path.basename(self.name)
79
80     @classmethod
81     def blame(cls, repo, commit, file):
82         """
83         The blame information for the given file at the given commit
84
85         Returns
86             list: [GitPython.Commit, list: [<line>]]
87         """
88         data = repo.git.blame(commit, '--', file, **{'p': True})
89         commits = {}
90         blames = []
91         info = None
92
93         for line in data.splitlines():
94             parts = re.split(r'\s+', line, 1)
95             if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]):
96                 if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line):
97                     m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line)
98                     id, origin_line, final_line, group_lines = m.groups()
99                     info = {'id': id}
100                     blames.append([None, []])
101                 elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line):
102                     m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line)
103                     id, origin_line, final_line = m.groups()
104                     info = {'id': id}
105             elif re.search(r'^(author|committer)', parts[0]):
106                 if re.search(r'^(.+)-mail$', parts[0]):
107                     m = re.search(r'^(.+)-mail$', parts[0])
108                     info["%s_email" % m.groups()[0]] = parts[-1]
109                 elif re.search(r'^(.+)-time$', parts[0]):
110                     m = re.search(r'^(.+)-time$', parts[0])
111                     info["%s_date" % m.groups()[0]] = time.gmtime(int(parts[-1]))
112                 elif re.search(r'^(author|committer)$', parts[0]):
113                     m = re.search(r'^(author|committer)$', parts[0])
114                     info[m.groups()[0]] = parts[-1]
115             elif re.search(r'^filename', parts[0]):
116                 info['filename'] = parts[-1]
117             elif re.search(r'^summary', parts[0]):
118                 info['summary'] = parts[-1]
119             elif parts[0] == '':
120                 if info:
121                     c = commits.has_key(info['id']) and commits[info['id']]
122                     if not c:
123                         c = Commit(repo, **{'id': info['id'],
124                                             'author': Actor.from_string(info['author'] + ' ' + info['author_email']),
125                                             'authored_date': info['author_date'],
126                                             'committer': Actor.from_string(info['committer'] + ' ' + info['committer_email']),
127                                             'committed_date': info['committer_date'],
128                                             'message': info['summary']})
129                         commits[info['id']] = c
130
131                     m = re.search(r'^\t(.*)$', line)
132                     text,  = m.groups()
133                     blames[-1][0] = c
134                     blames[-1][1] += text
135                     info = None
136
137         return blames
138
139     def __repr__(self):
140         return '<GitPython.Blob "%s">' % self.id