lib/testtools/testtools/compat.py

   1 # Copyright (c) 2008-2011 testtools developers. See LICENSE for details.
   2
   3 """Compatibility support for python 2 and 3."""
   4
   5 __metaclass__ = type
   6 __all__ = [
   7     '_b',
   8     '_u',
   9     'advance_iterator',
  10     'all',
  11     'BytesIO',
  12     'classtypes',
  13     'isbaseexception',
  14     'istext',
  15     'str_is_unicode',
  16     'StringIO',
  17     'reraise',
  18     'unicode_output_stream',
  19     ]
  20
  21 import codecs
  22 import linecache
  23 import locale
  24 import os
  25 import re
  26 import sys
  27 import traceback
  28
  29 from testtools.helpers import try_imports
  30
  31 BytesIO = try_imports(['StringIO.StringIO', 'io.BytesIO'])
  32 StringIO = try_imports(['StringIO.StringIO', 'io.StringIO'])
  33
  34 try:
  35     from testtools import _compat2x as _compat
  36     _compat
  37 except SyntaxError:
  38     from testtools import _compat3x as _compat
  39
  40 reraise = _compat.reraise
  41
  42
  43 __u_doc = """A function version of the 'u' prefix.
  44
  45 This is needed becayse the u prefix is not usable in Python 3 but is required
  46 in Python 2 to get a unicode object.
  47
  48 To migrate code that was written as u'\u1234' in Python 2 to 2+3 change
  49 it to be _u('\u1234'). The Python 3 interpreter will decode it
  50 appropriately and the no-op _u for Python 3 lets it through, in Python
  51 2 we then call unicode-escape in the _u function.
  52 """
  53
  54 if sys.version_info > (3, 0):
  55     def _u(s):
  56         return s
  57     _r = ascii
  58     def _b(s):
  59         """A byte literal."""
  60         return s.encode("latin-1")
  61     advance_iterator = next
  62     def istext(x):
  63         return isinstance(x, str)
  64     def classtypes():
  65         return (type,)
  66     str_is_unicode = True
  67 else:
  68     def _u(s):
  69         # The double replace mangling going on prepares the string for
  70         # unicode-escape - \foo is preserved, \u and \U are decoded.
  71         return (s.replace("\\", "\\\\").replace("\\\\u", "\\u")
  72             .replace("\\\\U", "\\U").decode("unicode-escape"))
  73     _r = repr
  74     def _b(s):
  75         return s
  76     advance_iterator = lambda it: it.next()
  77     def istext(x):
  78         return isinstance(x, basestring)
  79     def classtypes():
  80         import types
  81         return (type, types.ClassType)
  82     str_is_unicode = sys.platform == "cli"
  83
  84 _u.__doc__ = __u_doc
  85
  86
  87 if sys.version_info > (2, 5):
  88     all = all
  89     _error_repr = BaseException.__repr__
  90     def isbaseexception(exception):
  91         """Return whether exception inherits from BaseException only"""
  92         return (isinstance(exception, BaseException)
  93             and not isinstance(exception, Exception))
  94 else:
  95     def all(iterable):
  96         """If contents of iterable all evaluate as boolean True"""
  97         for obj in iterable:
  98             if not obj:
  99                 return False
 100         return True
 101     def _error_repr(exception):
 102         """Format an exception instance as Python 2.5 and later do"""
 103         return exception.__class__.__name__ + repr(exception.args)
 104     def isbaseexception(exception):
 105         """Return whether exception would inherit from BaseException only
 106
 107         This approximates the hierarchy in Python 2.5 and later, compare the
 108         difference between the diagrams at the bottom of the pages:
 109         <http://docs.python.org/release/2.4.4/lib/module-exceptions.html>
 110         <http://docs.python.org/release/2.5.4/lib/module-exceptions.html>
 111         """
 112         return isinstance(exception, (KeyboardInterrupt, SystemExit))
 113
 114
 115 def unicode_output_stream(stream):
 116     """Get wrapper for given stream that writes any unicode without exception
 117
 118     Characters that can't be coerced to the encoding of the stream, or 'ascii'
 119     if valid encoding is not found, will be replaced. The original stream may
 120     be returned in situations where a wrapper is determined unneeded.
 121
 122     The wrapper only allows unicode to be written, not non-ascii bytestrings,
 123     which is a good thing to ensure sanity and sanitation.
 124     """
 125     if sys.platform == "cli":
 126         # Best to never encode before writing in IronPython
 127         return stream
 128     try:
 129         writer = codecs.getwriter(stream.encoding or "")
 130     except (AttributeError, LookupError):
 131         # GZ 2010-06-16: Python 3 StringIO ends up here, but probably needs
 132         #                different handling as it doesn't want bytestrings
 133         return codecs.getwriter("ascii")(stream, "replace")
 134     if writer.__module__.rsplit(".", 1)[1].startswith("utf"):
 135         # The current stream has a unicode encoding so no error handler is needed
 136         if sys.version_info > (3, 0):
 137             return stream
 138         return writer(stream)
 139     if sys.version_info > (3, 0):
 140         # Python 3 doesn't seem to make this easy, handle a common case
 141         try:
 142             return stream.__class__(stream.buffer, stream.encoding, "replace",
 143                 stream.newlines, stream.line_buffering)
 144         except AttributeError:
 145             pass
 146     return writer(stream, "replace")
 147
 148
 149 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
 150 # using non-ascii causes a deprecation warning in 2.4 and it's cleaner to
 151 # treat all versions the same way
 152 _default_source_encoding = "ascii"
 153
 154 # Pattern specified in <http://www.python.org/dev/peps/pep-0263/>
 155 _cookie_search=re.compile("coding[:=]\s*([-\w.]+)").search
 156
 157 def _detect_encoding(lines):
 158     """Get the encoding of a Python source file from a list of lines as bytes
 159
 160     This function does less than tokenize.detect_encoding added in Python 3 as
 161     it does not attempt to raise a SyntaxError when the interpreter would, it
 162     just wants the encoding of a source file Python has already compiled and
 163     determined is valid.
 164     """
 165     if not lines:
 166         return _default_source_encoding
 167     if lines[0].startswith("\xef\xbb\xbf"):
 168         # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError
 169         return "utf-8"
 170     # Only the first two lines of the source file are examined
 171     magic = _cookie_search("".join(lines[:2]))
 172     if magic is None:
 173         return _default_source_encoding
 174     encoding = magic.group(1)
 175     try:
 176         codecs.lookup(encoding)
 177     except LookupError:
 178         # Some codecs raise something other than LookupError if they don't
 179         # support the given error handler, but not the text ones that could
 180         # actually be used for Python source code
 181         return _default_source_encoding
 182     return encoding
 183
 184
 185 class _EncodingTuple(tuple):
 186     """A tuple type that can have an encoding attribute smuggled on"""
 187
 188
 189 def _get_source_encoding(filename):
 190     """Detect, cache and return the encoding of Python source at filename"""
 191     try:
 192         return linecache.cache[filename].encoding
 193     except (AttributeError, KeyError):
 194         encoding = _detect_encoding(linecache.getlines(filename))
 195         if filename in linecache.cache:
 196             newtuple = _EncodingTuple(linecache.cache[filename])
 197             newtuple.encoding = encoding
 198             linecache.cache[filename] = newtuple
 199         return encoding
 200
 201
 202 def _get_exception_encoding():
 203     """Return the encoding we expect messages from the OS to be encoded in"""
 204     if os.name == "nt":
 205         # GZ 2010-05-24: Really want the codepage number instead, the error
 206         #                handling of standard codecs is more deterministic
 207         return "mbcs"
 208     # GZ 2010-05-23: We need this call to be after initialisation, but there's
 209     #                no benefit in asking more than once as it's a global
 210     #                setting that can change after the message is formatted.
 211     return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii"
 212
 213
 214 def _exception_to_text(evalue):
 215     """Try hard to get a sensible text value out of an exception instance"""
 216     try:
 217         return unicode(evalue)
 218     except KeyboardInterrupt:
 219         raise
 220     except:
 221         # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
 222         pass
 223     try:
 224         return str(evalue).decode(_get_exception_encoding(), "replace")
 225     except KeyboardInterrupt:
 226         raise
 227     except:
 228         # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
 229         pass
 230     # Okay, out of ideas, let higher level handle it
 231     return None
 232
 233
 234 # GZ 2010-05-23: This function is huge and horrible and I welcome suggestions
 235 #                on the best way to break it up
 236 _TB_HEADER = _u('Traceback (most recent call last):\n')
 237 def _format_exc_info(eclass, evalue, tb, limit=None):
 238     """Format a stack trace and the exception information as unicode
 239
 240     Compatibility function for Python 2 which ensures each component of a
 241     traceback is correctly decoded according to its origins.
 242
 243     Based on traceback.format_exception and related functions.
 244     """
 245     fs_enc = sys.getfilesystemencoding()
 246     if tb:
 247         list = [_TB_HEADER]
 248         extracted_list = []
 249         for filename, lineno, name, line in traceback.extract_tb(tb, limit):
 250             extracted_list.append((
 251                 filename.decode(fs_enc, "replace"),
 252                 lineno,
 253                 name.decode("ascii", "replace"),
 254                 line and line.decode(
 255                     _get_source_encoding(filename), "replace")))
 256         list.extend(traceback.format_list(extracted_list))
 257     else:
 258         list = []
 259     if evalue is None:
 260         # Is a (deprecated) string exception
 261         list.append((eclass + "\n").decode("ascii", "replace"))
 262         return list
 263     if isinstance(evalue, SyntaxError):
 264         # Avoid duplicating the special formatting for SyntaxError here,
 265         # instead create a new instance with unicode filename and line
 266         # Potentially gives duff spacing, but that's a pre-existing issue
 267         try:
 268             msg, (filename, lineno, offset, line) = evalue
 269         except (TypeError, ValueError):
 270             pass # Strange exception instance, fall through to generic code
 271         else:
 272             # Errors during parsing give the line from buffer encoded as
 273             # latin-1 or utf-8 or the encoding of the file depending on the
 274             # coding and whether the patch for issue #1031213 is applied, so
 275             # give up on trying to decode it and just read the file again
 276             if line:
 277                 bytestr = linecache.getline(filename, lineno)
 278                 if bytestr:
 279                     if lineno == 1 and bytestr.startswith("\xef\xbb\xbf"):
 280                         bytestr = bytestr[3:]
 281                     line = bytestr.decode(
 282                         _get_source_encoding(filename), "replace")
 283                     del linecache.cache[filename]
 284                 else:
 285                     line = line.decode("ascii", "replace")
 286             if filename:
 287                 filename = filename.decode(fs_enc, "replace")
 288             evalue = eclass(msg, (filename, lineno, offset, line))
 289             list.extend(traceback.format_exception_only(eclass, evalue))
 290             return list
 291     sclass = eclass.__name__
 292     svalue = _exception_to_text(evalue)
 293     if svalue:
 294         list.append("%s: %s\n" % (sclass, svalue))
 295     elif svalue is None:
 296         # GZ 2010-05-24: Not a great fallback message, but keep for the moment
 297         list.append("%s: <unprintable %s object>\n" % (sclass, sclass))
 298     else:
 299         list.append("%s\n" % sclass)
 300     return list