Remove bundled subunit.

[obnox/samba/samba-obnox.git] / lib / testtools / testtools / compat.py
diff --git a/lib/testtools/testtools/compat.py b/lib/testtools/testtools/compat.py

index c8a641be23cd0d13cee14f4664ec6d40d4fdc225..375eca2c02af50d9a70ce27dffb81f8e8833121f 100644 (file)
--- a/lib/testtools/testtools/compat.py
+++ b/lib/testtools/testtools/compat.py
@@ -25,6 +25,7 @@ import os
  import re
  import sys
  import traceback
+import unicodedata
  
  from testtools.helpers import try_imports
  
@@ -33,7 +34,6 @@ StringIO = try_imports(['StringIO.StringIO', 'io.StringIO'])
  
  try:
      from testtools import _compat2x as _compat
-    _compat
  except SyntaxError:
      from testtools import _compat3x as _compat
  
@@ -52,6 +52,7 @@ appropriately and the no-op _u for Python 3 lets it through, in Python
  """
  
  if sys.version_info > (3, 0):
+    import builtins
      def _u(s):
          return s
      _r = ascii
@@ -59,12 +60,14 @@ if sys.version_info > (3, 0):
          """A byte literal."""
          return s.encode("latin-1")
      advance_iterator = next
+    # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
      def istext(x):
          return isinstance(x, str)
      def classtypes():
          return (type,)
      str_is_unicode = True
  else:
+    import __builtin__ as builtins
      def _u(s):
          # The double replace mangling going on prepares the string for
          # unicode-escape - \foo is preserved, \u and \U are decoded.
@@ -112,6 +115,96 @@ else:
          return isinstance(exception, (KeyboardInterrupt, SystemExit))
  
  
+# GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
+#                there should be better ways to write code needing this.
+if not issubclass(getattr(builtins, "bytes", str), str):
+    def _isbytes(x):
+        return isinstance(x, bytes)
+else:
+    # Never return True on Pythons that provide the name but not the real type
+    def _isbytes(x):
+        return False
+
+
+def _slow_escape(text):
+    """Escape unicode ``text`` leaving printable characters unmodified
+
+    The behaviour emulates the Python 3 implementation of repr, see
+    unicode_repr in unicodeobject.c and isprintable definition.
+
+    Because this iterates over the input a codepoint at a time, it's slow, and
+    does not handle astral characters correctly on Python builds with 16 bit
+    rather than 32 bit unicode type.
+    """
+    output = []
+    for c in text:
+        o = ord(c)
+        if o < 256:
+            if o < 32 or 126 < o < 161:
+                output.append(c.encode("unicode-escape"))
+            elif o == 92:
+                # Separate due to bug in unicode-escape codec in Python 2.4
+                output.append("\\\\")
+            else:
+                output.append(c)
+        else:
+            # To get correct behaviour would need to pair up surrogates here
+            if unicodedata.category(c)[0] in "CZ":
+                output.append(c.encode("unicode-escape"))
+            else:
+                output.append(c)
+    return "".join(output)
+
+
+def text_repr(text, multiline=None):
+    """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``.
+    """
+    is_py3k = sys.version_info > (3, 0)
+    nl = _isbytes(text) and bytes((0xA,)) or "\n"
+    if multiline is None:
+        multiline = nl in text
+    if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
+        # Use normal repr for single line of unicode on Python 3 or bytes
+        return repr(text)
+    prefix = repr(text[:0])[:-2]
+    if multiline:
+        # To escape multiline strings, split and process each line in turn,
+        # making sure that quotes are not escaped. 
+        if is_py3k:
+            offset = len(prefix) + 1
+            lines = []
+            for l in text.split(nl):
+                r = repr(l)
+                q = r[-1]
+                lines.append(r[offset:-1].replace("\\" + q, q))
+        elif not str_is_unicode and isinstance(text, str):
+            lines = [l.encode("string-escape").replace("\\'", "'")
+                for l in text.split("\n")]
+        else:
+            lines = [_slow_escape(l) for l in text.split("\n")]
+        # Combine the escaped lines and append two of the closing quotes,
+        # then iterate over the result to escape triple quotes correctly.
+        _semi_done = "\n".join(lines) + "''"
+        p = 0
+        while True:
+            p = _semi_done.find("'''", p)
+            if p == -1:
+                break
+            _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
+            p += 2
+        return "".join([prefix, "'''\\\n", _semi_done, "'"])
+    escaped_text = _slow_escape(text)
+    # Determine which quote character to use and if one gets prefixed with a
+    # backslash following the same logic Python uses for repr() on strings
+    quote = "'"
+    if "'" in text:
+        if '"' in text:
+            escaped_text = escaped_text.replace("'", "\\'")
+        else:
+            quote = '"'
+    return "".join([prefix, quote, escaped_text, quote])
+
+
  def unicode_output_stream(stream):
      """Get wrapper for given stream that writes any unicode without exception
  
@@ -143,7 +236,7 @@ def unicode_output_stream(stream):
                  stream.newlines, stream.line_buffering)
          except AttributeError:
              pass
-    return writer(stream, "replace")    
+    return writer(stream, "replace")
  
  
  # The default source encoding is actually "iso-8859-1" until Python 2.5 but