4b2446efecf5ac1f499ce816f14eff5a93f4a1f1
[abartlet/samba.git/.git] / lib / testtools / testtools / tests / test_compat.py
1 # Copyright (c) 2010 testtools developers. See LICENSE for details.
2
3 """Tests for miscellaneous compatibility functions"""
4
5 import linecache
6 import os
7 import sys
8 import tempfile
9 import traceback
10
11 import testtools
12
13 from testtools.compat import (
14     _b,
15     _detect_encoding,
16     _get_source_encoding,
17     _u,
18     str_is_unicode,
19     text_repr,
20     unicode_output_stream,
21     )
22 from testtools.matchers import (
23     MatchesException,
24     Not,
25     Raises,
26     )
27
28
29 class TestDetectEncoding(testtools.TestCase):
30     """Test detection of Python source encodings"""
31
32     def _check_encoding(self, expected, lines, possibly_invalid=False):
33         """Check lines are valid Python and encoding is as expected"""
34         if not possibly_invalid:
35             compile(_b("".join(lines)), "<str>", "exec")
36         encoding = _detect_encoding(lines)
37         self.assertEqual(expected, encoding,
38             "Encoding %r expected but got %r from lines %r" %
39                 (expected, encoding, lines))
40
41     def test_examples_from_pep(self):
42         """Check the examples given in PEP 263 all work as specified
43
44         See 'Examples' section of <http://www.python.org/dev/peps/pep-0263/>
45         """
46         # With interpreter binary and using Emacs style file encoding comment:
47         self._check_encoding("latin-1", (
48             "#!/usr/bin/python\n",
49             "# -*- coding: latin-1 -*-\n",
50             "import os, sys\n"))
51         self._check_encoding("iso-8859-15", (
52             "#!/usr/bin/python\n",
53             "# -*- coding: iso-8859-15 -*-\n",
54             "import os, sys\n"))
55         self._check_encoding("ascii", (
56             "#!/usr/bin/python\n",
57             "# -*- coding: ascii -*-\n",
58             "import os, sys\n"))
59         # Without interpreter line, using plain text:
60         self._check_encoding("utf-8", (
61             "# This Python file uses the following encoding: utf-8\n",
62             "import os, sys\n"))
63         # Text editors might have different ways of defining the file's
64         # encoding, e.g.
65         self._check_encoding("latin-1", (
66             "#!/usr/local/bin/python\n",
67             "# coding: latin-1\n",
68             "import os, sys\n"))
69         # Without encoding comment, Python's parser will assume ASCII text:
70         self._check_encoding("ascii", (
71             "#!/usr/local/bin/python\n",
72             "import os, sys\n"))
73         # Encoding comments which don't work:
74         #   Missing "coding:" prefix:
75         self._check_encoding("ascii", (
76             "#!/usr/local/bin/python\n",
77             "# latin-1\n",
78             "import os, sys\n"))
79         #   Encoding comment not on line 1 or 2:
80         self._check_encoding("ascii", (
81             "#!/usr/local/bin/python\n",
82             "#\n",
83             "# -*- coding: latin-1 -*-\n",
84             "import os, sys\n"))
85         #   Unsupported encoding:
86         self._check_encoding("ascii", (
87             "#!/usr/local/bin/python\n",
88             "# -*- coding: utf-42 -*-\n",
89             "import os, sys\n"),
90             possibly_invalid=True)
91
92     def test_bom(self):
93         """Test the UTF-8 BOM counts as an encoding declaration"""
94         self._check_encoding("utf-8", (
95             "\xef\xbb\xbfimport sys\n",
96             ))
97         self._check_encoding("utf-8", (
98             "\xef\xbb\xbf# File encoding: UTF-8\n",
99             ))
100         self._check_encoding("utf-8", (
101             '\xef\xbb\xbf"""Module docstring\n',
102             '\xef\xbb\xbfThat should just be a ZWNB"""\n'))
103         self._check_encoding("latin-1", (
104             '"""Is this coding: latin-1 or coding: utf-8 instead?\n',
105             '\xef\xbb\xbfThose should be latin-1 bytes"""\n'))
106         self._check_encoding("utf-8", (
107             "\xef\xbb\xbf# Is the coding: utf-8 or coding: euc-jp instead?\n",
108             '"""Module docstring say \xe2\x98\x86"""\n'))
109
110     def test_multiple_coding_comments(self):
111         """Test only the first of multiple coding declarations counts"""
112         self._check_encoding("iso-8859-1", (
113             "# Is the coding: iso-8859-1\n",
114             "# Or is it coding: iso-8859-2\n"),
115             possibly_invalid=True)
116         self._check_encoding("iso-8859-1", (
117             "#!/usr/bin/python\n",
118             "# Is the coding: iso-8859-1\n",
119             "# Or is it coding: iso-8859-2\n"))
120         self._check_encoding("iso-8859-1", (
121             "# Is the coding: iso-8859-1 or coding: iso-8859-2\n",
122             "# Or coding: iso-8859-3 or coding: iso-8859-4\n"),
123             possibly_invalid=True)
124         self._check_encoding("iso-8859-2", (
125             "# Is the coding iso-8859-1 or coding: iso-8859-2\n",
126             "# Spot the missing colon above\n"))
127
128
129 class TestGetSourceEncoding(testtools.TestCase):
130     """Test reading and caching the encodings of source files"""
131
132     def setUp(self):
133         testtools.TestCase.setUp(self)
134         dir = tempfile.mkdtemp()
135         self.addCleanup(os.rmdir, dir)
136         self.filename = os.path.join(dir, self.id().rsplit(".", 1)[1] + ".py")
137         self._written = False
138
139     def put_source(self, text):
140         f = open(self.filename, "w")
141         try:
142             f.write(text)
143         finally:
144             f.close()
145             if not self._written:
146                 self._written = True
147                 self.addCleanup(os.remove, self.filename)
148                 self.addCleanup(linecache.cache.pop, self.filename, None)
149
150     def test_nonexistent_file_as_ascii(self):
151         """When file can't be found, the encoding should default to ascii"""
152         self.assertEquals("ascii", _get_source_encoding(self.filename))
153
154     def test_encoding_is_cached(self):
155         """The encoding should stay the same if the cache isn't invalidated"""
156         self.put_source(
157             "# coding: iso-8859-13\n"
158             "import os\n")
159         self.assertEquals("iso-8859-13", _get_source_encoding(self.filename))
160         self.put_source(
161             "# coding: rot-13\n"
162             "vzcbeg bf\n")
163         self.assertEquals("iso-8859-13", _get_source_encoding(self.filename))
164
165     def test_traceback_rechecks_encoding(self):
166         """A traceback function checks the cache and resets the encoding"""
167         self.put_source(
168             "# coding: iso-8859-8\n"
169             "import os\n")
170         self.assertEquals("iso-8859-8", _get_source_encoding(self.filename))
171         self.put_source(
172             "# coding: utf-8\n"
173             "import os\n")
174         try:
175             exec (compile("raise RuntimeError\n", self.filename, "exec"))
176         except RuntimeError:
177             traceback.extract_tb(sys.exc_info()[2])
178         else:
179             self.fail("RuntimeError not raised")
180         self.assertEquals("utf-8", _get_source_encoding(self.filename))
181
182
183 class _FakeOutputStream(object):
184     """A simple file-like object for testing"""
185
186     def __init__(self):
187         self.writelog = []
188
189     def write(self, obj):
190         self.writelog.append(obj)
191
192
193 class TestUnicodeOutputStream(testtools.TestCase):
194     """Test wrapping output streams so they work with arbitrary unicode"""
195
196     uni = _u("pa\u026a\u03b8\u0259n")
197
198     def setUp(self):
199         super(TestUnicodeOutputStream, self).setUp()
200         if sys.platform == "cli":
201             self.skip("IronPython shouldn't wrap streams to do encoding")
202
203     def test_no_encoding_becomes_ascii(self):
204         """A stream with no encoding attribute gets ascii/replace strings"""
205         sout = _FakeOutputStream()
206         unicode_output_stream(sout).write(self.uni)
207         self.assertEqual([_b("pa???n")], sout.writelog)
208
209     def test_encoding_as_none_becomes_ascii(self):
210         """A stream with encoding value of None gets ascii/replace strings"""
211         sout = _FakeOutputStream()
212         sout.encoding = None
213         unicode_output_stream(sout).write(self.uni)
214         self.assertEqual([_b("pa???n")], sout.writelog)
215
216     def test_bogus_encoding_becomes_ascii(self):
217         """A stream with a bogus encoding gets ascii/replace strings"""
218         sout = _FakeOutputStream()
219         sout.encoding = "bogus"
220         unicode_output_stream(sout).write(self.uni)
221         self.assertEqual([_b("pa???n")], sout.writelog)
222
223     def test_partial_encoding_replace(self):
224         """A string which can be partly encoded correctly should be"""
225         sout = _FakeOutputStream()
226         sout.encoding = "iso-8859-7"
227         unicode_output_stream(sout).write(self.uni)
228         self.assertEqual([_b("pa?\xe8?n")], sout.writelog)
229
230     @testtools.skipIf(str_is_unicode, "Tests behaviour when str is not unicode")
231     def test_unicode_encodings_wrapped_when_str_is_not_unicode(self):
232         """A unicode encoding is wrapped but needs no error handler"""
233         sout = _FakeOutputStream()
234         sout.encoding = "utf-8"
235         uout = unicode_output_stream(sout)
236         self.assertEqual(uout.errors, "strict")
237         uout.write(self.uni)
238         self.assertEqual([_b("pa\xc9\xaa\xce\xb8\xc9\x99n")], sout.writelog)
239
240     @testtools.skipIf(not str_is_unicode, "Tests behaviour when str is unicode")
241     def test_unicode_encodings_not_wrapped_when_str_is_unicode(self):
242         # No wrapping needed if native str type is unicode
243         sout = _FakeOutputStream()
244         sout.encoding = "utf-8"
245         uout = unicode_output_stream(sout)
246         self.assertIs(uout, sout)
247
248     def test_stringio(self):
249         """A StringIO object should maybe get an ascii native str type"""
250         try:
251             from cStringIO import StringIO
252             newio = False
253         except ImportError:
254             from io import StringIO
255             newio = True
256         sout = StringIO()
257         soutwrapper = unicode_output_stream(sout)
258         if newio:
259             self.expectFailure("Python 3 StringIO expects text not bytes",
260                 self.assertThat, lambda: soutwrapper.write(self.uni),
261                 Not(Raises(MatchesException(TypeError))))
262         soutwrapper.write(self.uni)
263         self.assertEqual("pa???n", sout.getvalue())
264
265
266 class TestTextRepr(testtools.TestCase):
267     """Ensure in extending repr, basic behaviours are not being broken"""
268
269     ascii_examples = (
270         # Single character examples
271         #  C0 control codes should be escaped except multiline \n
272         ("\x00", "'\\x00'", "'''\\\n\\x00'''"),
273         ("\b", "'\\x08'", "'''\\\n\\x08'''"),
274         ("\t", "'\\t'", "'''\\\n\\t'''"),
275         ("\n", "'\\n'", "'''\\\n\n'''"),
276         ("\r", "'\\r'", "'''\\\n\\r'''"),
277         #  Quotes and backslash should match normal repr behaviour
278         ('"', "'\"'", "'''\\\n\"'''"),
279         ("'", "\"'\"", "'''\\\n\\''''"),
280         ("\\", "'\\\\'", "'''\\\n\\\\'''"),
281         #  DEL is also unprintable and should be escaped
282         ("\x7F", "'\\x7f'", "'''\\\n\\x7f'''"),
283
284         # Character combinations that need double checking
285         ("\r\n", "'\\r\\n'", "'''\\\n\\r\n'''"),
286         ("\"'", "'\"\\''", "'''\\\n\"\\''''"),
287         ("'\"", "'\\'\"'", "'''\\\n'\"'''"),
288         ("\\n", "'\\\\n'", "'''\\\n\\\\n'''"),
289         ("\\\n", "'\\\\\\n'", "'''\\\n\\\\\n'''"),
290         ("\\' ", "\"\\\\' \"", "'''\\\n\\\\' '''"),
291         ("\\'\n", "\"\\\\'\\n\"", "'''\\\n\\\\'\n'''"),
292         ("\\'\"", "'\\\\\\'\"'", "'''\\\n\\\\'\"'''"),
293         ("\\'''", "\"\\\\'''\"", "'''\\\n\\\\\\'\\'\\''''"),
294         )
295
296     # Bytes with the high bit set should always be escaped
297     bytes_examples = (
298         (_b("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
299         (_b("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
300         (_b("\xC0"), "'\\xc0'", "'''\\\n\\xc0'''"),
301         (_b("\xFF"), "'\\xff'", "'''\\\n\\xff'''"),
302         (_b("\xC2\xA7"), "'\\xc2\\xa7'", "'''\\\n\\xc2\\xa7'''"),
303         )
304
305     # Unicode doesn't escape printable characters as per the Python 3 model
306     unicode_examples = (
307         # C1 codes are unprintable
308         (_u("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
309         (_u("\x9F"), "'\\x9f'", "'''\\\n\\x9f'''"),
310         # No-break space is unprintable
311         (_u("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
312         # Letters latin alphabets are printable
313         (_u("\xA1"), _u("'\xa1'"), _u("'''\\\n\xa1'''")),
314         (_u("\xFF"), _u("'\xff'"), _u("'''\\\n\xff'''")),
315         (_u("\u0100"), _u("'\u0100'"), _u("'''\\\n\u0100'''")),
316         # Line and paragraph seperators are unprintable
317         (_u("\u2028"), "'\\u2028'", "'''\\\n\\u2028'''"),
318         (_u("\u2029"), "'\\u2029'", "'''\\\n\\u2029'''"),
319         # Unpaired surrogates are unprintable
320         (_u("\uD800"), "'\\ud800'", "'''\\\n\\ud800'''"),
321         (_u("\uDFFF"), "'\\udfff'", "'''\\\n\\udfff'''"),
322         # Unprintable general categories not fully tested: Cc, Cf, Co, Cn, Zs
323         )
324
325     b_prefix = repr(_b(""))[:-2]
326     u_prefix = repr(_u(""))[:-2]
327
328     def test_ascii_examples_oneline_bytes(self):
329         for s, expected, _ in self.ascii_examples:
330             b = _b(s)
331             actual = text_repr(b, multiline=False)
332             # Add self.assertIsInstance check?
333             self.assertEqual(actual, self.b_prefix + expected)
334             self.assertEqual(eval(actual), b)
335
336     def test_ascii_examples_oneline_unicode(self):
337         for s, expected, _ in self.ascii_examples:
338             u = _u(s)
339             actual = text_repr(u, multiline=False)
340             self.assertEqual(actual, self.u_prefix + expected)
341             self.assertEqual(eval(actual), u)
342
343     def test_ascii_examples_multiline_bytes(self):
344         for s, _, expected in self.ascii_examples:
345             b = _b(s)
346             actual = text_repr(b, multiline=True)
347             self.assertEqual(actual, self.b_prefix + expected)
348             self.assertEqual(eval(actual), b)
349
350     def test_ascii_examples_multiline_unicode(self):
351         for s, _, expected in self.ascii_examples:
352             u = _u(s)
353             actual = text_repr(u, multiline=True)
354             self.assertEqual(actual, self.u_prefix + expected)
355             self.assertEqual(eval(actual), u)
356
357     def test_ascii_examples_defaultline_bytes(self):
358         for s, one, multi in self.ascii_examples:
359             expected = "\n" in s and multi or one
360             self.assertEqual(text_repr(_b(s)), self.b_prefix + expected)
361
362     def test_ascii_examples_defaultline_unicode(self):
363         for s, one, multi in self.ascii_examples:
364             expected = "\n" in s and multi or one
365             self.assertEqual(text_repr(_u(s)), self.u_prefix + expected)
366
367     def test_bytes_examples_oneline(self):
368         for b, expected, _ in self.bytes_examples:
369             actual = text_repr(b, multiline=False)
370             self.assertEqual(actual, self.b_prefix + expected)
371             self.assertEqual(eval(actual), b)
372
373     def test_bytes_examples_multiline(self):
374         for b, _, expected in self.bytes_examples:
375             actual = text_repr(b, multiline=True)
376             self.assertEqual(actual, self.b_prefix + expected)
377             self.assertEqual(eval(actual), b)
378
379     def test_unicode_examples_oneline(self):
380         for u, expected, _ in self.unicode_examples:
381             actual = text_repr(u, multiline=False)
382             self.assertEqual(actual, self.u_prefix + expected)
383             self.assertEqual(eval(actual), u)
384
385     def test_unicode_examples_multiline(self):
386         for u, _, expected in self.unicode_examples:
387             actual = text_repr(u, multiline=True)
388             self.assertEqual(actual, self.u_prefix + expected)
389             self.assertEqual(eval(actual), u)
390
391
392 def test_suite():
393     from unittest import TestLoader
394     return TestLoader().loadTestsFromName(__name__)