lib/testtools/testtools/tests/test_compat.py

   1 # Copyright (c) 2010 testtools developers. See LICENSE for details.
   2
   3 """Tests for miscellaneous compatibility functions"""
   4
   5 import linecache
   6 import os
   7 import sys
   8 import tempfile
   9 import traceback
  10
  11 import testtools
  12
  13 from testtools.compat import (
  14     _b,
  15     _detect_encoding,
  16     _get_source_encoding,
  17     _u,
  18     str_is_unicode,
  19     text_repr,
  20     unicode_output_stream,
  21     )
  22 from testtools.matchers import (
  23     MatchesException,
  24     Not,
  25     Raises,
  26     )
  27
  28
  29 class TestDetectEncoding(testtools.TestCase):
  30     """Test detection of Python source encodings"""
  31
  32     def _check_encoding(self, expected, lines, possibly_invalid=False):
  33         """Check lines are valid Python and encoding is as expected"""
  34         if not possibly_invalid:
  35             compile(_b("".join(lines)), "<str>", "exec")
  36         encoding = _detect_encoding(lines)
  37         self.assertEqual(expected, encoding,
  38             "Encoding %r expected but got %r from lines %r" %
  39                 (expected, encoding, lines))
  40
  41     def test_examples_from_pep(self):
  42         """Check the examples given in PEP 263 all work as specified
  43
  44         See 'Examples' section of <http://www.python.org/dev/peps/pep-0263/>
  45         """
  46         # With interpreter binary and using Emacs style file encoding comment:
  47         self._check_encoding("latin-1", (
  48             "#!/usr/bin/python\n",
  49             "# -*- coding: latin-1 -*-\n",
  50             "import os, sys\n"))
  51         self._check_encoding("iso-8859-15", (
  52             "#!/usr/bin/python\n",
  53             "# -*- coding: iso-8859-15 -*-\n",
  54             "import os, sys\n"))
  55         self._check_encoding("ascii", (
  56             "#!/usr/bin/python\n",
  57             "# -*- coding: ascii -*-\n",
  58             "import os, sys\n"))
  59         # Without interpreter line, using plain text:
  60         self._check_encoding("utf-8", (
  61             "# This Python file uses the following encoding: utf-8\n",
  62             "import os, sys\n"))
  63         # Text editors might have different ways of defining the file's
  64         # encoding, e.g.
  65         self._check_encoding("latin-1", (
  66             "#!/usr/local/bin/python\n",
  67             "# coding: latin-1\n",
  68             "import os, sys\n"))
  69         # Without encoding comment, Python's parser will assume ASCII text:
  70         self._check_encoding("ascii", (
  71             "#!/usr/local/bin/python\n",
  72             "import os, sys\n"))
  73         # Encoding comments which don't work:
  74         #   Missing "coding:" prefix:
  75         self._check_encoding("ascii", (
  76             "#!/usr/local/bin/python\n",
  77             "# latin-1\n",
  78             "import os, sys\n"))
  79         #   Encoding comment not on line 1 or 2:
  80         self._check_encoding("ascii", (
  81             "#!/usr/local/bin/python\n",
  82             "#\n",
  83             "# -*- coding: latin-1 -*-\n",
  84             "import os, sys\n"))
  85         #   Unsupported encoding:
  86         self._check_encoding("ascii", (
  87             "#!/usr/local/bin/python\n",
  88             "# -*- coding: utf-42 -*-\n",
  89             "import os, sys\n"),
  90             possibly_invalid=True)
  91
  92     def test_bom(self):
  93         """Test the UTF-8 BOM counts as an encoding declaration"""
  94         self._check_encoding("utf-8", (
  95             "\xef\xbb\xbfimport sys\n",
  96             ))
  97         self._check_encoding("utf-8", (
  98             "\xef\xbb\xbf# File encoding: UTF-8\n",
  99             ))
 100         self._check_encoding("utf-8", (
 101             '\xef\xbb\xbf"""Module docstring\n',
 102             '\xef\xbb\xbfThat should just be a ZWNB"""\n'))
 103         self._check_encoding("latin-1", (
 104             '"""Is this coding: latin-1 or coding: utf-8 instead?\n',
 105             '\xef\xbb\xbfThose should be latin-1 bytes"""\n'))
 106         self._check_encoding("utf-8", (
 107             "\xef\xbb\xbf# Is the coding: utf-8 or coding: euc-jp instead?\n",
 108             '"""Module docstring say \xe2\x98\x86"""\n'))
 109
 110     def test_multiple_coding_comments(self):
 111         """Test only the first of multiple coding declarations counts"""
 112         self._check_encoding("iso-8859-1", (
 113             "# Is the coding: iso-8859-1\n",
 114             "# Or is it coding: iso-8859-2\n"),
 115             possibly_invalid=True)
 116         self._check_encoding("iso-8859-1", (
 117             "#!/usr/bin/python\n",
 118             "# Is the coding: iso-8859-1\n",
 119             "# Or is it coding: iso-8859-2\n"))
 120         self._check_encoding("iso-8859-1", (
 121             "# Is the coding: iso-8859-1 or coding: iso-8859-2\n",
 122             "# Or coding: iso-8859-3 or coding: iso-8859-4\n"),
 123             possibly_invalid=True)
 124         self._check_encoding("iso-8859-2", (
 125             "# Is the coding iso-8859-1 or coding: iso-8859-2\n",
 126             "# Spot the missing colon above\n"))
 127
 128
 129 class TestGetSourceEncoding(testtools.TestCase):
 130     """Test reading and caching the encodings of source files"""
 131
 132     def setUp(self):
 133         testtools.TestCase.setUp(self)
 134         dir = tempfile.mkdtemp()
 135         self.addCleanup(os.rmdir, dir)
 136         self.filename = os.path.join(dir, self.id().rsplit(".", 1)[1] + ".py")
 137         self._written = False
 138
 139     def put_source(self, text):
 140         f = open(self.filename, "w")
 141         try:
 142             f.write(text)
 143         finally:
 144             f.close()
 145             if not self._written:
 146                 self._written = True
 147                 self.addCleanup(os.remove, self.filename)
 148                 self.addCleanup(linecache.cache.pop, self.filename, None)
 149
 150     def test_nonexistent_file_as_ascii(self):
 151         """When file can't be found, the encoding should default to ascii"""
 152         self.assertEquals("ascii", _get_source_encoding(self.filename))
 153
 154     def test_encoding_is_cached(self):
 155         """The encoding should stay the same if the cache isn't invalidated"""
 156         self.put_source(
 157             "# coding: iso-8859-13\n"
 158             "import os\n")
 159         self.assertEquals("iso-8859-13", _get_source_encoding(self.filename))
 160         self.put_source(
 161             "# coding: rot-13\n"
 162             "vzcbeg bf\n")
 163         self.assertEquals("iso-8859-13", _get_source_encoding(self.filename))
 164
 165     def test_traceback_rechecks_encoding(self):
 166         """A traceback function checks the cache and resets the encoding"""
 167         self.put_source(
 168             "# coding: iso-8859-8\n"
 169             "import os\n")
 170         self.assertEquals("iso-8859-8", _get_source_encoding(self.filename))
 171         self.put_source(
 172             "# coding: utf-8\n"
 173             "import os\n")
 174         try:
 175             exec (compile("raise RuntimeError\n", self.filename, "exec"))
 176         except RuntimeError:
 177             traceback.extract_tb(sys.exc_info()[2])
 178         else:
 179             self.fail("RuntimeError not raised")
 180         self.assertEquals("utf-8", _get_source_encoding(self.filename))
 181
 182
 183 class _FakeOutputStream(object):
 184     """A simple file-like object for testing"""
 185
 186     def __init__(self):
 187         self.writelog = []
 188
 189     def write(self, obj):
 190         self.writelog.append(obj)
 191
 192
 193 class TestUnicodeOutputStream(testtools.TestCase):
 194     """Test wrapping output streams so they work with arbitrary unicode"""
 195
 196     uni = _u("pa\u026a\u03b8\u0259n")
 197
 198     def setUp(self):
 199         super(TestUnicodeOutputStream, self).setUp()
 200         if sys.platform == "cli":
 201             self.skip("IronPython shouldn't wrap streams to do encoding")
 202
 203     def test_no_encoding_becomes_ascii(self):
 204         """A stream with no encoding attribute gets ascii/replace strings"""
 205         sout = _FakeOutputStream()
 206         unicode_output_stream(sout).write(self.uni)
 207         self.assertEqual([_b("pa???n")], sout.writelog)
 208
 209     def test_encoding_as_none_becomes_ascii(self):
 210         """A stream with encoding value of None gets ascii/replace strings"""
 211         sout = _FakeOutputStream()
 212         sout.encoding = None
 213         unicode_output_stream(sout).write(self.uni)
 214         self.assertEqual([_b("pa???n")], sout.writelog)
 215
 216     def test_bogus_encoding_becomes_ascii(self):
 217         """A stream with a bogus encoding gets ascii/replace strings"""
 218         sout = _FakeOutputStream()
 219         sout.encoding = "bogus"
 220         unicode_output_stream(sout).write(self.uni)
 221         self.assertEqual([_b("pa???n")], sout.writelog)
 222
 223     def test_partial_encoding_replace(self):
 224         """A string which can be partly encoded correctly should be"""
 225         sout = _FakeOutputStream()
 226         sout.encoding = "iso-8859-7"
 227         unicode_output_stream(sout).write(self.uni)
 228         self.assertEqual([_b("pa?\xe8?n")], sout.writelog)
 229
 230     @testtools.skipIf(str_is_unicode, "Tests behaviour when str is not unicode")
 231     def test_unicode_encodings_wrapped_when_str_is_not_unicode(self):
 232         """A unicode encoding is wrapped but needs no error handler"""
 233         sout = _FakeOutputStream()
 234         sout.encoding = "utf-8"
 235         uout = unicode_output_stream(sout)
 236         self.assertEqual(uout.errors, "strict")
 237         uout.write(self.uni)
 238         self.assertEqual([_b("pa\xc9\xaa\xce\xb8\xc9\x99n")], sout.writelog)
 239
 240     @testtools.skipIf(not str_is_unicode, "Tests behaviour when str is unicode")
 241     def test_unicode_encodings_not_wrapped_when_str_is_unicode(self):
 242         # No wrapping needed if native str type is unicode
 243         sout = _FakeOutputStream()
 244         sout.encoding = "utf-8"
 245         uout = unicode_output_stream(sout)
 246         self.assertIs(uout, sout)
 247
 248     def test_stringio(self):
 249         """A StringIO object should maybe get an ascii native str type"""
 250         try:
 251             from cStringIO import StringIO
 252             newio = False
 253         except ImportError:
 254             from io import StringIO
 255             newio = True
 256         sout = StringIO()
 257         soutwrapper = unicode_output_stream(sout)
 258         if newio:
 259             self.expectFailure("Python 3 StringIO expects text not bytes",
 260                 self.assertThat, lambda: soutwrapper.write(self.uni),
 261                 Not(Raises(MatchesException(TypeError))))
 262         soutwrapper.write(self.uni)
 263         self.assertEqual("pa???n", sout.getvalue())
 264
 265
 266 class TestTextRepr(testtools.TestCase):
 267     """Ensure in extending repr, basic behaviours are not being broken"""
 268
 269     ascii_examples = (
 270         # Single character examples
 271         #  C0 control codes should be escaped except multiline \n
 272         ("\x00", "'\\x00'", "'''\\\n\\x00'''"),
 273         ("\b", "'\\x08'", "'''\\\n\\x08'''"),
 274         ("\t", "'\\t'", "'''\\\n\\t'''"),
 275         ("\n", "'\\n'", "'''\\\n\n'''"),
 276         ("\r", "'\\r'", "'''\\\n\\r'''"),
 277         #  Quotes and backslash should match normal repr behaviour
 278         ('"', "'\"'", "'''\\\n\"'''"),
 279         ("'", "\"'\"", "'''\\\n\\''''"),
 280         ("\\", "'\\\\'", "'''\\\n\\\\'''"),
 281         #  DEL is also unprintable and should be escaped
 282         ("\x7F", "'\\x7f'", "'''\\\n\\x7f'''"),
 283
 284         # Character combinations that need double checking
 285         ("\r\n", "'\\r\\n'", "'''\\\n\\r\n'''"),
 286         ("\"'", "'\"\\''", "'''\\\n\"\\''''"),
 287         ("'\"", "'\\'\"'", "'''\\\n'\"'''"),
 288         ("\\n", "'\\\\n'", "'''\\\n\\\\n'''"),
 289         ("\\\n", "'\\\\\\n'", "'''\\\n\\\\\n'''"),
 290         ("\\' ", "\"\\\\' \"", "'''\\\n\\\\' '''"),
 291         ("\\'\n", "\"\\\\'\\n\"", "'''\\\n\\\\'\n'''"),
 292         ("\\'\"", "'\\\\\\'\"'", "'''\\\n\\\\'\"'''"),
 293         ("\\'''", "\"\\\\'''\"", "'''\\\n\\\\\\'\\'\\''''"),
 294         )
 295
 296     # Bytes with the high bit set should always be escaped
 297     bytes_examples = (
 298         (_b("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
 299         (_b("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
 300         (_b("\xC0"), "'\\xc0'", "'''\\\n\\xc0'''"),
 301         (_b("\xFF"), "'\\xff'", "'''\\\n\\xff'''"),
 302         (_b("\xC2\xA7"), "'\\xc2\\xa7'", "'''\\\n\\xc2\\xa7'''"),
 303         )
 304
 305     # Unicode doesn't escape printable characters as per the Python 3 model
 306     unicode_examples = (
 307         # C1 codes are unprintable
 308         (_u("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
 309         (_u("\x9F"), "'\\x9f'", "'''\\\n\\x9f'''"),
 310         # No-break space is unprintable
 311         (_u("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
 312         # Letters latin alphabets are printable
 313         (_u("\xA1"), _u("'\xa1'"), _u("'''\\\n\xa1'''")),
 314         (_u("\xFF"), _u("'\xff'"), _u("'''\\\n\xff'''")),
 315         (_u("\u0100"), _u("'\u0100'"), _u("'''\\\n\u0100'''")),
 316         # Line and paragraph seperators are unprintable
 317         (_u("\u2028"), "'\\u2028'", "'''\\\n\\u2028'''"),
 318         (_u("\u2029"), "'\\u2029'", "'''\\\n\\u2029'''"),
 319         # Unpaired surrogates are unprintable
 320         (_u("\uD800"), "'\\ud800'", "'''\\\n\\ud800'''"),
 321         (_u("\uDFFF"), "'\\udfff'", "'''\\\n\\udfff'''"),
 322         # Unprintable general categories not fully tested: Cc, Cf, Co, Cn, Zs
 323         )
 324
 325     b_prefix = repr(_b(""))[:-2]
 326     u_prefix = repr(_u(""))[:-2]
 327
 328     def test_ascii_examples_oneline_bytes(self):
 329         for s, expected, _ in self.ascii_examples:
 330             b = _b(s)
 331             actual = text_repr(b, multiline=False)
 332             # Add self.assertIsInstance check?
 333             self.assertEqual(actual, self.b_prefix + expected)
 334             self.assertEqual(eval(actual), b)
 335
 336     def test_ascii_examples_oneline_unicode(self):
 337         for s, expected, _ in self.ascii_examples:
 338             u = _u(s)
 339             actual = text_repr(u, multiline=False)
 340             self.assertEqual(actual, self.u_prefix + expected)
 341             self.assertEqual(eval(actual), u)
 342
 343     def test_ascii_examples_multiline_bytes(self):
 344         for s, _, expected in self.ascii_examples:
 345             b = _b(s)
 346             actual = text_repr(b, multiline=True)
 347             self.assertEqual(actual, self.b_prefix + expected)
 348             self.assertEqual(eval(actual), b)
 349
 350     def test_ascii_examples_multiline_unicode(self):
 351         for s, _, expected in self.ascii_examples:
 352             u = _u(s)
 353             actual = text_repr(u, multiline=True)
 354             self.assertEqual(actual, self.u_prefix + expected)
 355             self.assertEqual(eval(actual), u)
 356
 357     def test_ascii_examples_defaultline_bytes(self):
 358         for s, one, multi in self.ascii_examples:
 359             expected = "\n" in s and multi or one
 360             self.assertEqual(text_repr(_b(s)), self.b_prefix + expected)
 361
 362     def test_ascii_examples_defaultline_unicode(self):
 363         for s, one, multi in self.ascii_examples:
 364             expected = "\n" in s and multi or one
 365             self.assertEqual(text_repr(_u(s)), self.u_prefix + expected)
 366
 367     def test_bytes_examples_oneline(self):
 368         for b, expected, _ in self.bytes_examples:
 369             actual = text_repr(b, multiline=False)
 370             self.assertEqual(actual, self.b_prefix + expected)
 371             self.assertEqual(eval(actual), b)
 372
 373     def test_bytes_examples_multiline(self):
 374         for b, _, expected in self.bytes_examples:
 375             actual = text_repr(b, multiline=True)
 376             self.assertEqual(actual, self.b_prefix + expected)
 377             self.assertEqual(eval(actual), b)
 378
 379     def test_unicode_examples_oneline(self):
 380         for u, expected, _ in self.unicode_examples:
 381             actual = text_repr(u, multiline=False)
 382             self.assertEqual(actual, self.u_prefix + expected)
 383             self.assertEqual(eval(actual), u)
 384
 385     def test_unicode_examples_multiline(self):
 386         for u, _, expected in self.unicode_examples:
 387             actual = text_repr(u, multiline=True)
 388             self.assertEqual(actual, self.u_prefix + expected)
 389             self.assertEqual(eval(actual), u)
 390
 391
 392 def test_suite():
 393     from unittest import TestLoader
 394     return TestLoader().loadTestsFromName(__name__)