1 # Copyright (c) 2010 testtools developers. See LICENSE for details.
3 """Tests for miscellaneous compatibility functions"""
13 from testtools.compat import (
20 unicode_output_stream,
22 from testtools.matchers import (
29 class TestDetectEncoding(testtools.TestCase):
30 """Test detection of Python source encodings"""
32 def _check_encoding(self, expected, lines, possibly_invalid=False):
33 """Check lines are valid Python and encoding is as expected"""
34 if not possibly_invalid:
35 compile(_b("".join(lines)), "<str>", "exec")
36 encoding = _detect_encoding(lines)
37 self.assertEqual(expected, encoding,
38 "Encoding %r expected but got %r from lines %r" %
39 (expected, encoding, lines))
41 def test_examples_from_pep(self):
42 """Check the examples given in PEP 263 all work as specified
44 See 'Examples' section of <http://www.python.org/dev/peps/pep-0263/>
46 # With interpreter binary and using Emacs style file encoding comment:
47 self._check_encoding("latin-1", (
48 "#!/usr/bin/python\n",
49 "# -*- coding: latin-1 -*-\n",
51 self._check_encoding("iso-8859-15", (
52 "#!/usr/bin/python\n",
53 "# -*- coding: iso-8859-15 -*-\n",
55 self._check_encoding("ascii", (
56 "#!/usr/bin/python\n",
57 "# -*- coding: ascii -*-\n",
59 # Without interpreter line, using plain text:
60 self._check_encoding("utf-8", (
61 "# This Python file uses the following encoding: utf-8\n",
63 # Text editors might have different ways of defining the file's
65 self._check_encoding("latin-1", (
66 "#!/usr/local/bin/python\n",
67 "# coding: latin-1\n",
69 # Without encoding comment, Python's parser will assume ASCII text:
70 self._check_encoding("ascii", (
71 "#!/usr/local/bin/python\n",
73 # Encoding comments which don't work:
74 # Missing "coding:" prefix:
75 self._check_encoding("ascii", (
76 "#!/usr/local/bin/python\n",
79 # Encoding comment not on line 1 or 2:
80 self._check_encoding("ascii", (
81 "#!/usr/local/bin/python\n",
83 "# -*- coding: latin-1 -*-\n",
85 # Unsupported encoding:
86 self._check_encoding("ascii", (
87 "#!/usr/local/bin/python\n",
88 "# -*- coding: utf-42 -*-\n",
90 possibly_invalid=True)
93 """Test the UTF-8 BOM counts as an encoding declaration"""
94 self._check_encoding("utf-8", (
95 "\xef\xbb\xbfimport sys\n",
97 self._check_encoding("utf-8", (
98 "\xef\xbb\xbf# File encoding: UTF-8\n",
100 self._check_encoding("utf-8", (
101 '\xef\xbb\xbf"""Module docstring\n',
102 '\xef\xbb\xbfThat should just be a ZWNB"""\n'))
103 self._check_encoding("latin-1", (
104 '"""Is this coding: latin-1 or coding: utf-8 instead?\n',
105 '\xef\xbb\xbfThose should be latin-1 bytes"""\n'))
106 self._check_encoding("utf-8", (
107 "\xef\xbb\xbf# Is the coding: utf-8 or coding: euc-jp instead?\n",
108 '"""Module docstring say \xe2\x98\x86"""\n'))
110 def test_multiple_coding_comments(self):
111 """Test only the first of multiple coding declarations counts"""
112 self._check_encoding("iso-8859-1", (
113 "# Is the coding: iso-8859-1\n",
114 "# Or is it coding: iso-8859-2\n"),
115 possibly_invalid=True)
116 self._check_encoding("iso-8859-1", (
117 "#!/usr/bin/python\n",
118 "# Is the coding: iso-8859-1\n",
119 "# Or is it coding: iso-8859-2\n"))
120 self._check_encoding("iso-8859-1", (
121 "# Is the coding: iso-8859-1 or coding: iso-8859-2\n",
122 "# Or coding: iso-8859-3 or coding: iso-8859-4\n"),
123 possibly_invalid=True)
124 self._check_encoding("iso-8859-2", (
125 "# Is the coding iso-8859-1 or coding: iso-8859-2\n",
126 "# Spot the missing colon above\n"))
129 class TestGetSourceEncoding(testtools.TestCase):
130 """Test reading and caching the encodings of source files"""
133 testtools.TestCase.setUp(self)
134 dir = tempfile.mkdtemp()
135 self.addCleanup(os.rmdir, dir)
136 self.filename = os.path.join(dir, self.id().rsplit(".", 1)[1] + ".py")
137 self._written = False
139 def put_source(self, text):
140 f = open(self.filename, "w")
145 if not self._written:
147 self.addCleanup(os.remove, self.filename)
148 self.addCleanup(linecache.cache.pop, self.filename, None)
150 def test_nonexistent_file_as_ascii(self):
151 """When file can't be found, the encoding should default to ascii"""
152 self.assertEquals("ascii", _get_source_encoding(self.filename))
154 def test_encoding_is_cached(self):
155 """The encoding should stay the same if the cache isn't invalidated"""
157 "# coding: iso-8859-13\n"
159 self.assertEquals("iso-8859-13", _get_source_encoding(self.filename))
163 self.assertEquals("iso-8859-13", _get_source_encoding(self.filename))
165 def test_traceback_rechecks_encoding(self):
166 """A traceback function checks the cache and resets the encoding"""
168 "# coding: iso-8859-8\n"
170 self.assertEquals("iso-8859-8", _get_source_encoding(self.filename))
175 exec (compile("raise RuntimeError\n", self.filename, "exec"))
177 traceback.extract_tb(sys.exc_info()[2])
179 self.fail("RuntimeError not raised")
180 self.assertEquals("utf-8", _get_source_encoding(self.filename))
183 class _FakeOutputStream(object):
184 """A simple file-like object for testing"""
189 def write(self, obj):
190 self.writelog.append(obj)
193 class TestUnicodeOutputStream(testtools.TestCase):
194 """Test wrapping output streams so they work with arbitrary unicode"""
196 uni = _u("pa\u026a\u03b8\u0259n")
199 super(TestUnicodeOutputStream, self).setUp()
200 if sys.platform == "cli":
201 self.skip("IronPython shouldn't wrap streams to do encoding")
203 def test_no_encoding_becomes_ascii(self):
204 """A stream with no encoding attribute gets ascii/replace strings"""
205 sout = _FakeOutputStream()
206 unicode_output_stream(sout).write(self.uni)
207 self.assertEqual([_b("pa???n")], sout.writelog)
209 def test_encoding_as_none_becomes_ascii(self):
210 """A stream with encoding value of None gets ascii/replace strings"""
211 sout = _FakeOutputStream()
213 unicode_output_stream(sout).write(self.uni)
214 self.assertEqual([_b("pa???n")], sout.writelog)
216 def test_bogus_encoding_becomes_ascii(self):
217 """A stream with a bogus encoding gets ascii/replace strings"""
218 sout = _FakeOutputStream()
219 sout.encoding = "bogus"
220 unicode_output_stream(sout).write(self.uni)
221 self.assertEqual([_b("pa???n")], sout.writelog)
223 def test_partial_encoding_replace(self):
224 """A string which can be partly encoded correctly should be"""
225 sout = _FakeOutputStream()
226 sout.encoding = "iso-8859-7"
227 unicode_output_stream(sout).write(self.uni)
228 self.assertEqual([_b("pa?\xe8?n")], sout.writelog)
230 @testtools.skipIf(str_is_unicode, "Tests behaviour when str is not unicode")
231 def test_unicode_encodings_wrapped_when_str_is_not_unicode(self):
232 """A unicode encoding is wrapped but needs no error handler"""
233 sout = _FakeOutputStream()
234 sout.encoding = "utf-8"
235 uout = unicode_output_stream(sout)
236 self.assertEqual(uout.errors, "strict")
238 self.assertEqual([_b("pa\xc9\xaa\xce\xb8\xc9\x99n")], sout.writelog)
240 @testtools.skipIf(not str_is_unicode, "Tests behaviour when str is unicode")
241 def test_unicode_encodings_not_wrapped_when_str_is_unicode(self):
242 # No wrapping needed if native str type is unicode
243 sout = _FakeOutputStream()
244 sout.encoding = "utf-8"
245 uout = unicode_output_stream(sout)
246 self.assertIs(uout, sout)
248 def test_stringio(self):
249 """A StringIO object should maybe get an ascii native str type"""
251 from cStringIO import StringIO
254 from io import StringIO
257 soutwrapper = unicode_output_stream(sout)
259 self.expectFailure("Python 3 StringIO expects text not bytes",
260 self.assertThat, lambda: soutwrapper.write(self.uni),
261 Not(Raises(MatchesException(TypeError))))
262 soutwrapper.write(self.uni)
263 self.assertEqual("pa???n", sout.getvalue())
266 class TestTextRepr(testtools.TestCase):
267 """Ensure in extending repr, basic behaviours are not being broken"""
270 # Single character examples
271 # C0 control codes should be escaped except multiline \n
272 ("\x00", "'\\x00'", "'''\\\n\\x00'''"),
273 ("\b", "'\\x08'", "'''\\\n\\x08'''"),
274 ("\t", "'\\t'", "'''\\\n\\t'''"),
275 ("\n", "'\\n'", "'''\\\n\n'''"),
276 ("\r", "'\\r'", "'''\\\n\\r'''"),
277 # Quotes and backslash should match normal repr behaviour
278 ('"', "'\"'", "'''\\\n\"'''"),
279 ("'", "\"'\"", "'''\\\n\\''''"),
280 ("\\", "'\\\\'", "'''\\\n\\\\'''"),
281 # DEL is also unprintable and should be escaped
282 ("\x7F", "'\\x7f'", "'''\\\n\\x7f'''"),
284 # Character combinations that need double checking
285 ("\r\n", "'\\r\\n'", "'''\\\n\\r\n'''"),
286 ("\"'", "'\"\\''", "'''\\\n\"\\''''"),
287 ("'\"", "'\\'\"'", "'''\\\n'\"'''"),
288 ("\\n", "'\\\\n'", "'''\\\n\\\\n'''"),
289 ("\\\n", "'\\\\\\n'", "'''\\\n\\\\\n'''"),
290 ("\\' ", "\"\\\\' \"", "'''\\\n\\\\' '''"),
291 ("\\'\n", "\"\\\\'\\n\"", "'''\\\n\\\\'\n'''"),
292 ("\\'\"", "'\\\\\\'\"'", "'''\\\n\\\\'\"'''"),
293 ("\\'''", "\"\\\\'''\"", "'''\\\n\\\\\\'\\'\\''''"),
296 # Bytes with the high bit set should always be escaped
298 (_b("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
299 (_b("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
300 (_b("\xC0"), "'\\xc0'", "'''\\\n\\xc0'''"),
301 (_b("\xFF"), "'\\xff'", "'''\\\n\\xff'''"),
302 (_b("\xC2\xA7"), "'\\xc2\\xa7'", "'''\\\n\\xc2\\xa7'''"),
305 # Unicode doesn't escape printable characters as per the Python 3 model
307 # C1 codes are unprintable
308 (_u("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
309 (_u("\x9F"), "'\\x9f'", "'''\\\n\\x9f'''"),
310 # No-break space is unprintable
311 (_u("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
312 # Letters latin alphabets are printable
313 (_u("\xA1"), _u("'\xa1'"), _u("'''\\\n\xa1'''")),
314 (_u("\xFF"), _u("'\xff'"), _u("'''\\\n\xff'''")),
315 (_u("\u0100"), _u("'\u0100'"), _u("'''\\\n\u0100'''")),
316 # Line and paragraph seperators are unprintable
317 (_u("\u2028"), "'\\u2028'", "'''\\\n\\u2028'''"),
318 (_u("\u2029"), "'\\u2029'", "'''\\\n\\u2029'''"),
319 # Unpaired surrogates are unprintable
320 (_u("\uD800"), "'\\ud800'", "'''\\\n\\ud800'''"),
321 (_u("\uDFFF"), "'\\udfff'", "'''\\\n\\udfff'''"),
322 # Unprintable general categories not fully tested: Cc, Cf, Co, Cn, Zs
325 b_prefix = repr(_b(""))[:-2]
326 u_prefix = repr(_u(""))[:-2]
328 def test_ascii_examples_oneline_bytes(self):
329 for s, expected, _ in self.ascii_examples:
331 actual = text_repr(b, multiline=False)
332 # Add self.assertIsInstance check?
333 self.assertEqual(actual, self.b_prefix + expected)
334 self.assertEqual(eval(actual), b)
336 def test_ascii_examples_oneline_unicode(self):
337 for s, expected, _ in self.ascii_examples:
339 actual = text_repr(u, multiline=False)
340 self.assertEqual(actual, self.u_prefix + expected)
341 self.assertEqual(eval(actual), u)
343 def test_ascii_examples_multiline_bytes(self):
344 for s, _, expected in self.ascii_examples:
346 actual = text_repr(b, multiline=True)
347 self.assertEqual(actual, self.b_prefix + expected)
348 self.assertEqual(eval(actual), b)
350 def test_ascii_examples_multiline_unicode(self):
351 for s, _, expected in self.ascii_examples:
353 actual = text_repr(u, multiline=True)
354 self.assertEqual(actual, self.u_prefix + expected)
355 self.assertEqual(eval(actual), u)
357 def test_ascii_examples_defaultline_bytes(self):
358 for s, one, multi in self.ascii_examples:
359 expected = "\n" in s and multi or one
360 self.assertEqual(text_repr(_b(s)), self.b_prefix + expected)
362 def test_ascii_examples_defaultline_unicode(self):
363 for s, one, multi in self.ascii_examples:
364 expected = "\n" in s and multi or one
365 self.assertEqual(text_repr(_u(s)), self.u_prefix + expected)
367 def test_bytes_examples_oneline(self):
368 for b, expected, _ in self.bytes_examples:
369 actual = text_repr(b, multiline=False)
370 self.assertEqual(actual, self.b_prefix + expected)
371 self.assertEqual(eval(actual), b)
373 def test_bytes_examples_multiline(self):
374 for b, _, expected in self.bytes_examples:
375 actual = text_repr(b, multiline=True)
376 self.assertEqual(actual, self.b_prefix + expected)
377 self.assertEqual(eval(actual), b)
379 def test_unicode_examples_oneline(self):
380 for u, expected, _ in self.unicode_examples:
381 actual = text_repr(u, multiline=False)
382 self.assertEqual(actual, self.u_prefix + expected)
383 self.assertEqual(eval(actual), u)
385 def test_unicode_examples_multiline(self):
386 for u, _, expected in self.unicode_examples:
387 actual = text_repr(u, multiline=True)
388 self.assertEqual(actual, self.u_prefix + expected)
389 self.assertEqual(eval(actual), u)
393 from unittest import TestLoader
394 return TestLoader().loadTestsFromName(__name__)