lib/testtools/testtools/compat.py

   1 # Copyright (c) 2008-2011 testtools developers. See LICENSE for details.
   2
   3 """Compatibility support for python 2 and 3."""
   4
   5 __metaclass__ = type
   6 __all__ = [
   7     '_b',
   8     '_u',
   9     'advance_iterator',
  10     'all',
  11     'BytesIO',
  12     'classtypes',
  13     'isbaseexception',
  14     'istext',
  15     'str_is_unicode',
  16     'StringIO',
  17     'reraise',
  18     'unicode_output_stream',
  19     ]
  20
  21 import codecs
  22 import linecache
  23 import locale
  24 import os
  25 import re
  26 import sys
  27 import traceback
  28 import unicodedata
  29
  30 from testtools.helpers import try_imports
  31
  32 BytesIO = try_imports(['StringIO.StringIO', 'io.BytesIO'])
  33 StringIO = try_imports(['StringIO.StringIO', 'io.StringIO'])
  34
  35 try:
  36     from testtools import _compat2x as _compat
  37 except SyntaxError:
  38     from testtools import _compat3x as _compat
  39
  40 reraise = _compat.reraise
  41
  42
  43 __u_doc = """A function version of the 'u' prefix.
  44
  45 This is needed becayse the u prefix is not usable in Python 3 but is required
  46 in Python 2 to get a unicode object.
  47
  48 To migrate code that was written as u'\u1234' in Python 2 to 2+3 change
  49 it to be _u('\u1234'). The Python 3 interpreter will decode it
  50 appropriately and the no-op _u for Python 3 lets it through, in Python
  51 2 we then call unicode-escape in the _u function.
  52 """
  53
  54 if sys.version_info > (3, 0):
  55     import builtins
  56     def _u(s):
  57         return s
  58     _r = ascii
  59     def _b(s):
  60         """A byte literal."""
  61         return s.encode("latin-1")
  62     advance_iterator = next
  63     # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
  64     def istext(x):
  65         return isinstance(x, str)
  66     def classtypes():
  67         return (type,)
  68     str_is_unicode = True
  69 else:
  70     import __builtin__ as builtins
  71     def _u(s):
  72         # The double replace mangling going on prepares the string for
  73         # unicode-escape - \foo is preserved, \u and \U are decoded.
  74         return (s.replace("\\", "\\\\").replace("\\\\u", "\\u")
  75             .replace("\\\\U", "\\U").decode("unicode-escape"))
  76     _r = repr
  77     def _b(s):
  78         return s
  79     advance_iterator = lambda it: it.next()
  80     def istext(x):
  81         return isinstance(x, basestring)
  82     def classtypes():
  83         import types
  84         return (type, types.ClassType)
  85     str_is_unicode = sys.platform == "cli"
  86
  87 _u.__doc__ = __u_doc
  88
  89
  90 if sys.version_info > (2, 5):
  91     all = all
  92     _error_repr = BaseException.__repr__
  93     def isbaseexception(exception):
  94         """Return whether exception inherits from BaseException only"""
  95         return (isinstance(exception, BaseException)
  96             and not isinstance(exception, Exception))
  97 else:
  98     def all(iterable):
  99         """If contents of iterable all evaluate as boolean True"""
 100         for obj in iterable:
 101             if not obj:
 102                 return False
 103         return True
 104     def _error_repr(exception):
 105         """Format an exception instance as Python 2.5 and later do"""
 106         return exception.__class__.__name__ + repr(exception.args)
 107     def isbaseexception(exception):
 108         """Return whether exception would inherit from BaseException only
 109
 110         This approximates the hierarchy in Python 2.5 and later, compare the
 111         difference between the diagrams at the bottom of the pages:
 112         <http://docs.python.org/release/2.4.4/lib/module-exceptions.html>
 113         <http://docs.python.org/release/2.5.4/lib/module-exceptions.html>
 114         """
 115         return isinstance(exception, (KeyboardInterrupt, SystemExit))
 116
 117
 118 # GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
 119 #                there should be better ways to write code needing this.
 120 if not issubclass(getattr(builtins, "bytes", str), str):
 121     def _isbytes(x):
 122         return isinstance(x, bytes)
 123 else:
 124     # Never return True on Pythons that provide the name but not the real type
 125     def _isbytes(x):
 126         return False
 127
 128
 129 def _slow_escape(text):
 130     """Escape unicode ``text`` leaving printable characters unmodified
 131
 132     The behaviour emulates the Python 3 implementation of repr, see
 133     unicode_repr in unicodeobject.c and isprintable definition.
 134
 135     Because this iterates over the input a codepoint at a time, it's slow, and
 136     does not handle astral characters correctly on Python builds with 16 bit
 137     rather than 32 bit unicode type.
 138     """
 139     output = []
 140     for c in text:
 141         o = ord(c)
 142         if o < 256:
 143             if o < 32 or 126 < o < 161:
 144                 output.append(c.encode("unicode-escape"))
 145             elif o == 92:
 146                 # Separate due to bug in unicode-escape codec in Python 2.4
 147                 output.append("\\\\")
 148             else:
 149                 output.append(c)
 150         else:
 151             # To get correct behaviour would need to pair up surrogates here
 152             if unicodedata.category(c)[0] in "CZ":
 153                 output.append(c.encode("unicode-escape"))
 154             else:
 155                 output.append(c)
 156     return "".join(output)
 157
 158
 159 def text_repr(text, multiline=None):
 160     """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``.
 161     """
 162     is_py3k = sys.version_info > (3, 0)
 163     nl = _isbytes(text) and bytes((0xA,)) or "\n"
 164     if multiline is None:
 165         multiline = nl in text
 166     if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
 167         # Use normal repr for single line of unicode on Python 3 or bytes
 168         return repr(text)
 169     prefix = repr(text[:0])[:-2]
 170     if multiline:
 171         # To escape multiline strings, split and process each line in turn,
 172         # making sure that quotes are not escaped.
 173         if is_py3k:
 174             offset = len(prefix) + 1
 175             lines = []
 176             for l in text.split(nl):
 177                 r = repr(l)
 178                 q = r[-1]
 179                 lines.append(r[offset:-1].replace("\\" + q, q))
 180         elif not str_is_unicode and isinstance(text, str):
 181             lines = [l.encode("string-escape").replace("\\'", "'")
 182                 for l in text.split("\n")]
 183         else:
 184             lines = [_slow_escape(l) for l in text.split("\n")]
 185         # Combine the escaped lines and append two of the closing quotes,
 186         # then iterate over the result to escape triple quotes correctly.
 187         _semi_done = "\n".join(lines) + "''"
 188         p = 0
 189         while True:
 190             p = _semi_done.find("'''", p)
 191             if p == -1:
 192                 break
 193             _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
 194             p += 2
 195         return "".join([prefix, "'''\\\n", _semi_done, "'"])
 196     escaped_text = _slow_escape(text)
 197     # Determine which quote character to use and if one gets prefixed with a
 198     # backslash following the same logic Python uses for repr() on strings
 199     quote = "'"
 200     if "'" in text:
 201         if '"' in text:
 202             escaped_text = escaped_text.replace("'", "\\'")
 203         else:
 204             quote = '"'
 205     return "".join([prefix, quote, escaped_text, quote])
 206
 207
 208 def unicode_output_stream(stream):
 209     """Get wrapper for given stream that writes any unicode without exception
 210
 211     Characters that can't be coerced to the encoding of the stream, or 'ascii'
 212     if valid encoding is not found, will be replaced. The original stream may
 213     be returned in situations where a wrapper is determined unneeded.
 214
 215     The wrapper only allows unicode to be written, not non-ascii bytestrings,
 216     which is a good thing to ensure sanity and sanitation.
 217     """
 218     if sys.platform == "cli":
 219         # Best to never encode before writing in IronPython
 220         return stream
 221     try:
 222         writer = codecs.getwriter(stream.encoding or "")
 223     except (AttributeError, LookupError):
 224         # GZ 2010-06-16: Python 3 StringIO ends up here, but probably needs
 225         #                different handling as it doesn't want bytestrings
 226         return codecs.getwriter("ascii")(stream, "replace")
 227     if writer.__module__.rsplit(".", 1)[1].startswith("utf"):
 228         # The current stream has a unicode encoding so no error handler is needed
 229         if sys.version_info > (3, 0):
 230             return stream
 231         return writer(stream)
 232     if sys.version_info > (3, 0):
 233         # Python 3 doesn't seem to make this easy, handle a common case
 234         try:
 235             return stream.__class__(stream.buffer, stream.encoding, "replace",
 236                 stream.newlines, stream.line_buffering)
 237         except AttributeError:
 238             pass
 239     return writer(stream, "replace")
 240
 241
 242 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
 243 # using non-ascii causes a deprecation warning in 2.4 and it's cleaner to
 244 # treat all versions the same way
 245 _default_source_encoding = "ascii"
 246
 247 # Pattern specified in <http://www.python.org/dev/peps/pep-0263/>
 248 _cookie_search=re.compile("coding[:=]\s*([-\w.]+)").search
 249
 250 def _detect_encoding(lines):
 251     """Get the encoding of a Python source file from a list of lines as bytes
 252
 253     This function does less than tokenize.detect_encoding added in Python 3 as
 254     it does not attempt to raise a SyntaxError when the interpreter would, it
 255     just wants the encoding of a source file Python has already compiled and
 256     determined is valid.
 257     """
 258     if not lines:
 259         return _default_source_encoding
 260     if lines[0].startswith("\xef\xbb\xbf"):
 261         # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError
 262         return "utf-8"
 263     # Only the first two lines of the source file are examined
 264     magic = _cookie_search("".join(lines[:2]))
 265     if magic is None:
 266         return _default_source_encoding
 267     encoding = magic.group(1)
 268     try:
 269         codecs.lookup(encoding)
 270     except LookupError:
 271         # Some codecs raise something other than LookupError if they don't
 272         # support the given error handler, but not the text ones that could
 273         # actually be used for Python source code
 274         return _default_source_encoding
 275     return encoding
 276
 277
 278 class _EncodingTuple(tuple):
 279     """A tuple type that can have an encoding attribute smuggled on"""
 280
 281
 282 def _get_source_encoding(filename):
 283     """Detect, cache and return the encoding of Python source at filename"""
 284     try:
 285         return linecache.cache[filename].encoding
 286     except (AttributeError, KeyError):
 287         encoding = _detect_encoding(linecache.getlines(filename))
 288         if filename in linecache.cache:
 289             newtuple = _EncodingTuple(linecache.cache[filename])
 290             newtuple.encoding = encoding
 291             linecache.cache[filename] = newtuple
 292         return encoding
 293
 294
 295 def _get_exception_encoding():
 296     """Return the encoding we expect messages from the OS to be encoded in"""
 297     if os.name == "nt":
 298         # GZ 2010-05-24: Really want the codepage number instead, the error
 299         #                handling of standard codecs is more deterministic
 300         return "mbcs"
 301     # GZ 2010-05-23: We need this call to be after initialisation, but there's
 302     #                no benefit in asking more than once as it's a global
 303     #                setting that can change after the message is formatted.
 304     return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii"
 305
 306
 307 def _exception_to_text(evalue):
 308     """Try hard to get a sensible text value out of an exception instance"""
 309     try:
 310         return unicode(evalue)
 311     except KeyboardInterrupt:
 312         raise
 313     except:
 314         # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
 315         pass
 316     try:
 317         return str(evalue).decode(_get_exception_encoding(), "replace")
 318     except KeyboardInterrupt:
 319         raise
 320     except:
 321         # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
 322         pass
 323     # Okay, out of ideas, let higher level handle it
 324     return None
 325
 326
 327 # GZ 2010-05-23: This function is huge and horrible and I welcome suggestions
 328 #                on the best way to break it up
 329 _TB_HEADER = _u('Traceback (most recent call last):\n')
 330 def _format_exc_info(eclass, evalue, tb, limit=None):
 331     """Format a stack trace and the exception information as unicode
 332
 333     Compatibility function for Python 2 which ensures each component of a
 334     traceback is correctly decoded according to its origins.
 335
 336     Based on traceback.format_exception and related functions.
 337     """
 338     fs_enc = sys.getfilesystemencoding()
 339     if tb:
 340         list = [_TB_HEADER]
 341         extracted_list = []
 342         for filename, lineno, name, line in traceback.extract_tb(tb, limit):
 343             extracted_list.append((
 344                 filename.decode(fs_enc, "replace"),
 345                 lineno,
 346                 name.decode("ascii", "replace"),
 347                 line and line.decode(
 348                     _get_source_encoding(filename), "replace")))
 349         list.extend(traceback.format_list(extracted_list))
 350     else:
 351         list = []
 352     if evalue is None:
 353         # Is a (deprecated) string exception
 354         list.append((eclass + "\n").decode("ascii", "replace"))
 355         return list
 356     if isinstance(evalue, SyntaxError):
 357         # Avoid duplicating the special formatting for SyntaxError here,
 358         # instead create a new instance with unicode filename and line
 359         # Potentially gives duff spacing, but that's a pre-existing issue
 360         try:
 361             msg, (filename, lineno, offset, line) = evalue
 362         except (TypeError, ValueError):
 363             pass # Strange exception instance, fall through to generic code
 364         else:
 365             # Errors during parsing give the line from buffer encoded as
 366             # latin-1 or utf-8 or the encoding of the file depending on the
 367             # coding and whether the patch for issue #1031213 is applied, so
 368             # give up on trying to decode it and just read the file again
 369             if line:
 370                 bytestr = linecache.getline(filename, lineno)
 371                 if bytestr:
 372                     if lineno == 1 and bytestr.startswith("\xef\xbb\xbf"):
 373                         bytestr = bytestr[3:]
 374                     line = bytestr.decode(
 375                         _get_source_encoding(filename), "replace")
 376                     del linecache.cache[filename]
 377                 else:
 378                     line = line.decode("ascii", "replace")
 379             if filename:
 380                 filename = filename.decode(fs_enc, "replace")
 381             evalue = eclass(msg, (filename, lineno, offset, line))
 382             list.extend(traceback.format_exception_only(eclass, evalue))
 383             return list
 384     sclass = eclass.__name__
 385     svalue = _exception_to_text(evalue)
 386     if svalue:
 387         list.append("%s: %s\n" % (sclass, svalue))
 388     elif svalue is None:
 389         # GZ 2010-05-24: Not a great fallback message, but keep for the moment
 390         list.append("%s: <unprintable %s object>\n" % (sclass, sclass))
 391     else:
 392         list.append("%s\n" % sclass)
 393     return list