pytest/source_char: check for mixed direction text

author Douglas Bagnall <douglas.bagnall@catalyst.net.nz>

Wed, 17 Nov 2021 20:17:53 +0000 (20:17 +0000)

committer Andrew Bartlett <abartlet@samba.org>

Fri, 3 Dec 2021 18:53:43 +0000 (18:53 +0000)
author Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Wed, 17 Nov 2021 20:17:53 +0000 (20:17 +0000)
committer Andrew Bartlett <abartlet@samba.org>
Fri, 3 Dec 2021 18:53:43 +0000 (18:53 +0000)
diff --git a/python/samba/tests/source_chars.py b/python/samba/tests/source_chars.py

index db7f131d8156d03e5a2d6ec07af7db525629dc92..093d7318cb019b93dad80bf7d6b5d95ff1a0ddca 100644 (file)
--- a/python/samba/tests/source_chars.py
+++ b/python/samba/tests/source_chars.py
@@ -94,6 +94,14 @@ SAFE_FORMAT_CHARS = {
      '\ufeff'
  }
  
+# These files legitimately mix left-to-right and right-to-left text.
+# In the real world mixing directions would be normal in bilingual
+# documents, but it is rare in Samba source code.
+BIDI_FILES = {
+    'source4/heimdal/lib/wind/NormalizationTest.txt',
+    'testdata/source-chars-bidi.py',
+}
+
  
  def get_git_files():
      try:
@@ -196,9 +204,15 @@ class CharacterTests(TestCase):
                  else:
                      self.fail(f"could not decode {name}: {e}")
  
+            dirs = set()
              for c in set(s):
                  if is_bad_char(c):
                      self.fail(f"{name} has potentially bad format characters!")
+                dirs.add(u.bidirectional(c))
+
+            if 'L' in dirs and 'R' in dirs:
+                if name not in BIDI_FILES:
+                    self.fail(f"{name} has LTR and RTL text ({dirs})")
  
      def test_unexpected_format_chars_do_fail(self):
          """Test the test"""
@@ -212,6 +226,21 @@ class CharacterTests(TestCase):
              bad_chars = [c for c in chars if is_bad_char(c)]
              self.assertEqual(len(bad_chars), n_bad)
  
+    def test_unexpected_bidi_fails(self):
+        """Test the test"""
+        for name in [
+                'testdata/source-chars-bidi.py'
+        ]:
+            fullname = os.path.join(ROOT, name)
+            with open(fullname) as f:
+                s = f.read()
+
+            dirs = set()
+            for c in set(s):
+                dirs.add(u.bidirectional(c))
+            self.assertIn('L', dirs)
+            self.assertIn('R', dirs)
+
  
  def check_file_text():
      """If called directly as a script, count the found characters."""
diff --git a/testdata/source-chars-bidi.py b/testdata/source-chars-bidi.py

new file mode 100644 (file)

index 0000000..d728da5
--- /dev/null
+++ b/testdata/source-chars-bidi.py
@@ -0,0 +1,24 @@
+# Used in samba.tests.source_chars to ensure bi-directional text is
+# caught. (make test TESTS=samba.tests.source_chars)
+
+x = א =2
+ח = n = 3
+
+a = x  # 2 * n * m
+b = א  # 2 * ח * m
+c = "x#"  #  n
+d = "א#"  #  ח
+e = f"x{x}n{n}"
+f = f"א{א}ח{ח}"
+
+print(a)
+print(b)
+print(c)
+print(d)
+print(e)
+print(f)
+
+assert a == b
+assert c == d.replace("א", "x")
+assert e[1] == f[1]
+assert e[3] == f[3]
author	Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
	Wed, 17 Nov 2021 20:17:53 +0000 (20:17 +0000)
committer	Andrew Bartlett <abartlet@samba.org>
	Fri, 3 Dec 2021 18:53:43 +0000 (18:53 +0000)
python/samba/tests/source_chars.py		patch \| blob \| history
testdata/source-chars-bidi.py	[new file with mode: 0644]	patch \| blob