0e4441be545a60aac54a3ad79e701df04f51c94b
[bbaumbach/samba-autobuild/.git] / source4 / dsdb / tests / python / sort.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Originally based on ./sam.py
4 from __future__ import print_function
5 from unicodedata import normalize
6 import locale
7 locale.setlocale(locale.LC_ALL, ('en_US', 'UTF-8'))
8
9 import optparse
10 import sys
11 import os
12 import re
13
14 sys.path.insert(0, "bin/python")
15 import samba
16 from samba.tests.subunitrun import SubunitOptions, TestProgram
17 from samba.compat import cmp_fn
18 from samba.compat import cmp_to_key_fn
19 from samba.compat import text_type
20 import samba.getopt as options
21
22 from samba.auth import system_session
23 import ldb
24 from samba.samdb import SamDB
25
26 parser = optparse.OptionParser("sort.py [options] <host>")
27 sambaopts = options.SambaOptions(parser)
28 parser.add_option_group(sambaopts)
29 parser.add_option_group(options.VersionOptions(parser))
30 # use command line creds if available
31 credopts = options.CredentialsOptions(parser)
32 parser.add_option_group(credopts)
33 subunitopts = SubunitOptions(parser)
34 parser.add_option_group(subunitopts)
35
36 parser.add_option('--elements', type='int', default=33,
37                   help="use this many elements in the tests")
38
39 opts, args = parser.parse_args()
40
41 if len(args) < 1:
42     parser.print_usage()
43     sys.exit(1)
44
45 host = args[0]
46
47 lp = sambaopts.get_loadparm()
48 creds = credopts.get_credentials(lp)
49
50
51 def norm(x):
52     if not isinstance(x, text_type):
53         x = x.decode('utf8')
54     return normalize('NFKC', x).upper()
55
56
57 # Python, Windows, and Samba all sort the following sequence in
58 # drastically different ways. The order here is what you get from
59 # Windows2012R2.
60 FIENDISH_TESTS = [' ', ' e', '\t-\t', '\n\t\t', '!@#!@#!', '¼', '¹', '1',
61                   '1/4', '1⁄4', '1\xe2\x81\x845', '3', 'abc', 'fo\x00od',
62
63                   # Here we also had '\x00food', but that seems to sort
64                   # non-deterministically on Windows vis-a-vis 'fo\x00od'.
65
66                   'kōkako', 'ŋđ¼³ŧ “«đð', 'ŋđ¼³ŧ“«đð',
67                   'sorttest', 'sorttēst11,', 'śorttest2', 'śoRttest2',
68                   'ś-o-r-t-t-e-s-t-2', 'soRTTēst2,', 'ṡorttest4', 'ṡorttesT4',
69                   'sörttest-5', 'sÖrttest-5', 'so-rttest7,', '桑巴']
70
71
72 class BaseSortTests(samba.tests.TestCase):
73     avoid_tricky_sort = False
74     maxDiff = 2000
75
76     def create_user(self, i, n, prefix='sorttest', suffix='', attrs=None,
77                     tricky=False):
78         name = "%s%d%s" % (prefix, i, suffix)
79         user = {
80             'cn': name,
81             "objectclass": "user",
82             'givenName': "abcdefghijklmnopqrstuvwxyz"[i % 26],
83             "roomNumber": "%sb\x00c" % (n - i),
84             "carLicense": "后来经",
85             "employeeNumber": "%s%sx" % (abs(i * (99 - i)), '\n' * (i & 255)),
86             "accountExpires": "%s" % (10 ** 9 + 1000000 * i),
87             "msTSExpireDate4": "19%02d0101010000.0Z" % (i % 100),
88             "flags": str(i * (n - i)),
89             "serialNumber": "abc %s%s%s" % ('AaBb |-/'[i & 7],
90                                             ' 3z}'[i & 3],
91                                             '"@'[i & 1],),
92             "comment": "Favourite colour is %d" % (n % (i + 1)),
93         }
94
95         if self.avoid_tricky_sort:
96             # We are not even going to try passing tests that assume
97             # some kind of Unicode awareness.
98             for k, v in user.items():
99                 user[k] = re.sub(r'[^\w,.]', 'X', v)
100         else:
101             # Add some even trickier ones!
102             fiendish_index = i % len(FIENDISH_TESTS)
103             user.update({
104                 # Sort doesn't look past a NUL byte.
105                 "photo": "\x00%d" % (n - i),
106                 "audio": "%sn octet string %s%s ♫♬\x00lalala" % ('Aa'[i & 1],
107                                                                  chr(i & 255),
108                                                                  i),
109                 "displayNamePrintable": "%d\x00%c" % (i, i & 255),
110                 "adminDisplayName": "%d\x00b" % (n - i),
111                 "title": "%d%sb" % (n - i, '\x00' * i),
112
113                 # Names that vary only in case. Windows returns
114                 # equivalent addresses in the order they were put
115                 # in ('a st', 'A st',...). We don't check that.
116                 "street": "%s st" % (chr(65 | (i & 14) | ((i & 1) * 32))),
117
118                 "streetAddress": FIENDISH_TESTS[fiendish_index],
119                 "postalAddress": FIENDISH_TESTS[-fiendish_index],
120             })
121
122         if attrs is not None:
123             user.update(attrs)
124
125         user['dn'] = "cn=%s,%s" % (user['cn'], self.ou)
126
127         self.users.append(user)
128         self.ldb.add(user)
129         return user
130
131     def setUp(self):
132         super(BaseSortTests, self).setUp()
133         self.ldb = SamDB(host, credentials=creds,
134                          session_info=system_session(lp), lp=lp)
135
136         self.base_dn = self.ldb.domain_dn()
137         self.ou = "ou=sort,%s" % self.base_dn
138         if False:
139             try:
140                 self.ldb.delete(self.ou, ['tree_delete:1'])
141             except ldb.LdbError as e:
142                 print("tried deleting %s, got error %s" % (self.ou, e))
143
144         self.ldb.add({
145             "dn": self.ou,
146             "objectclass": "organizationalUnit"})
147         self.users = []
148         n = opts.elements
149         for i in range(n):
150             self.create_user(i, n)
151
152         attrs = set(self.users[0].keys()) - set([
153             'objectclass', 'dn'])
154         self.binary_sorted_keys = attrs.intersection(['audio',
155                                                       'photo',
156                                                       "msTSExpireDate4",
157                                                       'serialNumber',
158                                                       "displayNamePrintable"])
159
160         self.numeric_sorted_keys = attrs.intersection(['flags',
161                                                        'accountExpires'])
162
163         self.timestamp_keys = attrs.intersection(['msTSExpireDate4'])
164
165         self.int64_keys = set(['accountExpires'])
166
167         self.locale_sorted_keys = [x for x in attrs if
168                                    x not in (self.binary_sorted_keys |
169                                              self.numeric_sorted_keys)]
170
171         self.expected_results = {}
172         self.expected_results_binary = {}
173
174         for k in self.locale_sorted_keys:
175             # Using key=locale.strxfrm fails on \x00
176             forward = sorted((norm(x[k]) for x in self.users),
177                              key=cmp_to_key_fn(locale.strcoll))
178             reverse = list(reversed(forward))
179             self.expected_results[k] = (forward, reverse)
180
181         for k in self.binary_sorted_keys:
182             forward = sorted((x[k] for x in self.users))
183             reverse = list(reversed(forward))
184             self.expected_results_binary[k] = (forward, reverse)
185             self.expected_results[k] = (forward, reverse)
186
187         # Fix up some because Python gets it wrong, using Schwartzian tramsform
188         for k in ('adminDisplayName', 'title', 'streetAddress',
189                   'employeeNumber'):
190             if k in self.expected_results:
191                 broken = self.expected_results[k][0]
192                 tmp = [(x.replace('\x00', ''), x) for x in broken]
193                 tmp.sort()
194                 fixed = [x[1] for x in tmp]
195                 self.expected_results[k] = (fixed, list(reversed(fixed)))
196         for k in ('streetAddress', 'postalAddress'):
197             if k in self.expected_results:
198                 c = {}
199                 for u in self.users:
200                     x = u[k]
201                     if x in c:
202                         c[x] += 1
203                         continue
204                     c[x] = 1
205                 fixed = []
206                 for x in FIENDISH_TESTS:
207                     fixed += [norm(x)] * c.get(x, 0)
208
209                 rev = list(reversed(fixed))
210                 self.expected_results[k] = (fixed, rev)
211
212     def tearDown(self):
213         super(BaseSortTests, self).tearDown()
214         self.ldb.delete(self.ou, ['tree_delete:1'])
215
216     def _test_server_sort_default(self):
217         attrs = self.locale_sorted_keys
218
219         for attr in attrs:
220             for rev in (0, 1):
221                 res = self.ldb.search(self.ou,
222                                       scope=ldb.SCOPE_ONELEVEL, attrs=[attr],
223                                       controls=["server_sort:1:%d:%s" %
224                                                 (rev, attr)])
225                 self.assertEqual(len(res), len(self.users))
226
227                 expected_order = self.expected_results[attr][rev]
228                 received_order = [norm(x[attr][0]) for x in res]
229                 if expected_order != received_order:
230                     print(attr, ['forward', 'reverse'][rev])
231                     print("expected", expected_order)
232                     print("recieved", received_order)
233                     print("unnormalised:", [x[attr][0] for x in res])
234                     print("unnormalised: «%s»" % '»  «'.join(str(x[attr][0])
235                                                              for x in res))
236                 self.assertEquals(expected_order, received_order)
237
238     def _test_server_sort_binary(self):
239         for attr in self.binary_sorted_keys:
240             for rev in (0, 1):
241                 res = self.ldb.search(self.ou,
242                                       scope=ldb.SCOPE_ONELEVEL, attrs=[attr],
243                                       controls=["server_sort:1:%d:%s" %
244                                                 (rev, attr)])
245
246                 self.assertEqual(len(res), len(self.users))
247                 expected_order = self.expected_results_binary[attr][rev]
248                 received_order = [str(x[attr][0]) for x in res]
249                 if expected_order != received_order:
250                     print(attr)
251                     print(expected_order)
252                     print(received_order)
253                 self.assertEquals(expected_order, received_order)
254
255     def _test_server_sort_us_english(self):
256         # Windows doesn't support many matching rules, but does allow
257         # the locale specific sorts -- if it has the locale installed.
258         # The most reliable locale is the default US English, which
259         # won't change the sort order.
260
261         for lang, oid in [('en_US', '1.2.840.113556.1.4.1499'),
262                           ]:
263
264             for attr in self.locale_sorted_keys:
265                 for rev in (0, 1):
266                     res = self.ldb.search(self.ou,
267                                           scope=ldb.SCOPE_ONELEVEL,
268                                           attrs=[attr],
269                                           controls=["server_sort:1:%d:%s:%s" %
270                                                     (rev, attr, oid)])
271
272                     self.assertTrue(len(res) == len(self.users))
273                     expected_order = self.expected_results[attr][rev]
274                     received_order = [norm(x[attr][0]) for x in res]
275                     if expected_order != received_order:
276                         print(attr, lang)
277                         print(['forward', 'reverse'][rev])
278                         print("expected: ", expected_order)
279                         print("recieved: ", received_order)
280                         print("unnormalised:", [x[attr][0] for x in res])
281                         print("unnormalised: «%s»" % '»  «'.join(str(x[attr][0])
282                                                                  for x in res))
283
284                     self.assertEquals(expected_order, received_order)
285
286     def _test_server_sort_different_attr(self):
287
288         def cmp_locale(a, b):
289             return locale.strcoll(a[0], b[0])
290
291         def cmp_binary(a, b):
292             return cmp_fn(a[0], b[0])
293
294         def cmp_numeric(a, b):
295             return cmp_fn(int(a[0]), int(b[0]))
296
297         # For testing simplicity, the attributes in here need to be
298         # unique for each user. Otherwise there are multiple possible
299         # valid answers.
300         sort_functions = {'cn': cmp_binary,
301                           "employeeNumber": cmp_locale,
302                           "accountExpires": cmp_numeric,
303                           "msTSExpireDate4": cmp_binary}
304         attrs = list(sort_functions.keys())
305         attr_pairs = zip(attrs, attrs[1:] + attrs[:1])
306
307         for sort_attr, result_attr in attr_pairs:
308             forward = sorted(((norm(x[sort_attr]), norm(x[result_attr]))
309                              for x in self.users),
310                              key=cmp_to_key_fn(sort_functions[sort_attr]))
311             reverse = list(reversed(forward))
312
313             for rev in (0, 1):
314                 res = self.ldb.search(self.ou,
315                                       scope=ldb.SCOPE_ONELEVEL,
316                                       attrs=[result_attr],
317                                       controls=["server_sort:1:%d:%s" %
318                                                 (rev, sort_attr)])
319                 self.assertEqual(len(res), len(self.users))
320                 pairs = (forward, reverse)[rev]
321
322                 expected_order = [x[1] for x in pairs]
323                 received_order = [norm(x[result_attr][0]) for x in res]
324
325                 if expected_order != received_order:
326                     print(sort_attr, result_attr, ['forward', 'reverse'][rev])
327                     print("expected", expected_order)
328                     print("recieved", received_order)
329                     print("unnormalised:", [x[result_attr][0] for x in res])
330                     print("unnormalised: «%s»" % '»  «'.join(str(x[result_attr][0])
331                                                              for x in res))
332                     print("pairs:", pairs)
333                     # There are bugs in Windows that we don't want (or
334                     # know how) to replicate regarding timestamp sorting.
335                     # Let's remind ourselves.
336                     if result_attr == "msTSExpireDate4":
337                         print('-' * 72)
338                         print("This test fails against Windows with the "
339                               "default number of elements (33).")
340                         print("Try with --elements=27 (or similar).")
341                         print('-' * 72)
342
343                 self.assertEquals(expected_order, received_order)
344                 for x in res:
345                     if sort_attr in x:
346                         self.fail('the search for %s should not return %s' %
347                                   (result_attr, sort_attr))
348
349
350 class SimpleSortTests(BaseSortTests):
351     avoid_tricky_sort = True
352
353     def test_server_sort_different_attr(self):
354         self._test_server_sort_different_attr()
355
356     def test_server_sort_default(self):
357         self._test_server_sort_default()
358
359     def test_server_sort_binary(self):
360         self._test_server_sort_binary()
361
362     def test_server_sort_us_english(self):
363         self._test_server_sort_us_english()
364
365
366 class UnicodeSortTests(BaseSortTests):
367     avoid_tricky_sort = False
368
369     def test_server_sort_default(self):
370         self._test_server_sort_default()
371
372     def test_server_sort_us_english(self):
373         self._test_server_sort_us_english()
374
375     def test_server_sort_different_attr(self):
376         self._test_server_sort_different_attr()
377
378
379 if "://" not in host:
380     if os.path.isfile(host):
381         host = "tdb://%s" % host
382     else:
383         host = "ldap://%s" % host
384
385
386 TestProgram(module=__name__, opts=subunitopts)