2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "torture/torture.h"
25 #include "system/iconv.h"
26 #include "system/time.h"
27 #include "libcli/raw/libcliraw.h"
28 #include "param/param.h"
29 #include "torture/util.h"
34 static bool iconv_untestable(struct torture_context *tctx)
38 if (!lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
39 torture_skip(tctx, "system iconv disabled - skipping test");
41 cd = iconv_open("UTF-16LE", "UCS-4LE");
42 if (cd == (iconv_t)-1)
43 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
46 cd = iconv_open("UTF-16LE", "CP850");
47 if (cd == (iconv_t)-1)
48 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
55 generate a UTF-16LE buffer for a given unicode codepoint
57 static int gen_codepoint_utf16(unsigned int codepoint,
58 char *buf, size_t *size)
63 size_t size_in, size_out, ret;
65 cd = iconv_open("UTF-16LE", "UCS-4LE");
66 if (cd == (iconv_t)-1) {
72 in[0] = codepoint & 0xFF;
73 in[1] = (codepoint>>8) & 0xFF;
74 in[2] = (codepoint>>16) & 0xFF;
75 in[3] = (codepoint>>24) & 0xFF;
81 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
90 work out the unicode codepoint of the first UTF-8 character in the buffer
92 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
97 size_t size_out, size_in, ret;
99 cd = iconv_open("UCS-4LE", charset);
102 ptr_out = (char *)out;
103 size_out = sizeof(out);
104 memset(out, 0, sizeof(out));
106 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
110 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
114 display a buffer with name prefix
116 static void show_buf(const char *name, uint8_t *buf, size_t size)
120 for (i=0;i<size;i++) {
121 printf("%02x ", buf[i]);
127 given a UTF-16LE buffer, test the system and built-in iconv code to
128 make sure they do exactly the same thing in converting the buffer to
129 "charset", then convert it back again and ensure we get the same
132 static bool test_buffer(struct torture_context *test,
133 uint8_t *inbuf, size_t size, const char *charset)
135 uint8_t buf1[1000], buf2[1000], buf3[1000];
136 size_t outsize1, outsize2, outsize3;
139 size_t size_in1, size_in2, size_in3;
140 size_t ret1, ret2, ret3, len1, len2;
143 static smb_iconv_t cd2, cd3;
144 static const char *last_charset;
146 if (cd && last_charset) {
148 smb_iconv_close(cd2);
149 smb_iconv_close(cd3);
154 cd = iconv_open(charset, "UTF-16LE");
155 if (cd == (iconv_t)-1) {
157 talloc_asprintf(test,
158 "failed to open %s to UTF-16LE",
161 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
162 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
163 last_charset = charset;
166 /* internal convert to charset - placing result in buf1 */
167 ptr_in = (char *)inbuf;
168 ptr_out = (char *)buf1;
170 outsize1 = sizeof(buf1);
172 memset(ptr_out, 0, outsize1);
174 ret1 = smb_iconv(cd2, (const char **) &ptr_in, &size_in1, &ptr_out, &outsize1);
177 /* system convert to charset - placing result in buf2 */
178 ptr_in = (char *)inbuf;
179 ptr_out = (char *)buf2;
181 outsize2 = sizeof(buf2);
183 memset(ptr_out, 0, outsize2);
185 ret2 = iconv(cd, &ptr_in, &size_in2, &ptr_out, &outsize2);
188 len1 = sizeof(buf1) - outsize1;
189 len2 = sizeof(buf2) - outsize2;
191 /* codepoints above 1M are not interesting for now */
193 memcmp(buf1, buf2, len1) == 0 &&
194 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
198 memcmp(buf1, buf2, len2) == 0 &&
199 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
203 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
205 if (errno1 != errno2) {
206 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
207 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
208 torture_fail(test, talloc_asprintf(test,
210 errno1, strerror(errno1),
211 errno2, strerror(errno2)));
214 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
216 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
219 memcmp(buf1, buf2, len1) != 0) {
220 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
221 show_buf(" IN1:", inbuf, size-size_in1);
222 show_buf(" IN2:", inbuf, size-size_in2);
223 show_buf("OUT1:", buf1, len1);
224 show_buf("OUT2:", buf2, len2);
225 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
226 torture_comment(test, "next codepoint is %u",
227 get_codepoint((char *)(buf2+len1), len2-len1, charset));
229 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
230 torture_comment(test, "next codepoint is %u",
231 get_codepoint((char *)(buf1+len2),len1-len2, charset));
234 torture_fail(test, "failed");
237 /* convert back to UTF-16, putting result in buf3 */
238 size = size - size_in1;
239 ptr_in = (char *)buf1;
240 ptr_out = (char *)buf3;
242 outsize3 = sizeof(buf3);
244 memset(ptr_out, 0, outsize3);
245 ret3 = smb_iconv(cd3, (const char **) &ptr_in, &size_in3, &ptr_out, &outsize3);
247 /* we only internally support the first 1M codepoints */
248 if (outsize3 != sizeof(buf3) - size &&
249 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
250 size - (sizeof(buf3) - outsize3),
251 "UTF-16LE") >= (1<<20)) {
255 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
256 "pull failed - %s", strerror(errno)));
258 if (strncmp(charset, "UTF", 3) != 0) {
259 /* don't expect perfect mappings for non UTF charsets */
264 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
267 if (memcmp(buf3, inbuf, size) != 0) {
268 torture_comment(test, "pull bytes mismatch:");
269 show_buf("inbuf", inbuf, size);
270 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
271 torture_comment(test, "next codepoint is %u\n",
272 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
273 size - (sizeof(buf3) - outsize3),
275 torture_fail(test, "");
283 test the push_codepoint() and next_codepoint() functions for a given
286 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
292 size = push_codepoint_convenience(lpcfg_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
293 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
294 "Invalid Codepoint range");
296 if (size == -1) return true;
298 buf[size] = random();
299 buf[size+1] = random();
300 buf[size+2] = random();
301 buf[size+3] = random();
303 c = next_codepoint_convenience(lpcfg_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
305 torture_assert(tctx, c == codepoint,
306 talloc_asprintf(tctx,
307 "next_codepoint(%u) failed - gave %u", codepoint, c));
309 torture_assert(tctx, size2 == size,
310 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
311 codepoint, (int)size2, (int)size));
316 static bool test_next_codepoint(struct torture_context *tctx)
318 unsigned int codepoint;
319 if (iconv_untestable(tctx))
322 for (codepoint=0;codepoint<(1<<20);codepoint++) {
323 if (!test_codepoint(tctx, codepoint))
329 static bool test_first_1m(struct torture_context *tctx)
331 unsigned int codepoint;
333 unsigned char inbuf[1000];
335 if (iconv_untestable(tctx))
338 for (codepoint=0;codepoint<(1<<20);codepoint++) {
339 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
343 if (codepoint % 1000 == 0) {
344 if (torture_setting_bool(tctx, "progress", true)) {
345 torture_comment(tctx, "codepoint=%u \r", codepoint);
350 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
356 static bool test_random_5m(struct torture_context *tctx)
358 unsigned char inbuf[1000];
361 if (iconv_untestable(tctx))
364 for (i=0;i<500000;i++) {
369 if (torture_setting_bool(tctx, "progress", true)) {
370 torture_comment(tctx, "i=%u \r", i);
375 size = random() % 100;
376 for (c=0;c<size;c++) {
377 if (random() % 100 < 80) {
378 inbuf[c] = random() % 128;
382 if (random() % 10 == 0) {
385 if (random() % 10 == 0) {
389 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
390 printf("i=%d failed UTF-8\n", i);
394 if (!test_buffer(tctx, inbuf, size, "CP850")) {
395 printf("i=%d failed CP850\n", i);
403 static bool test_string2key(struct torture_context *tctx)
407 TALLOC_CTX *mem_ctx = talloc_new(tctx);
408 size_t len = (random()%1000)+1;
409 const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
415 const char *correct = "a\357\277\275b\357\277\275c\001defg";
417 buf = talloc_size(mem_ctx, len*2);
418 generate_random_buffer((uint8_t *)buf, len*2);
420 torture_comment(tctx, "converting random buffer\n");
422 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) {
423 torture_fail(tctx, "Failed to convert random buffer\n");
427 SSVAL(&le1[2*i], 0, in1[i]);
430 torture_comment(tctx, "converting fixed buffer to UTF16\n");
432 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) {
433 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
436 torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
438 torture_comment(tctx, "converting fixed buffer to UTF8\n");
440 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) {
441 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
444 torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
445 "conversion gave incorrect result\n");
447 talloc_free(mem_ctx);
452 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
454 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
456 torture_suite_add_simple_test(suite, "string2key",
459 torture_suite_add_simple_test(suite, "next_codepoint()",
460 test_next_codepoint);
462 torture_suite_add_simple_test(suite, "first 1M codepoints",
465 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
468 torture_suite_add_simple_test(suite, "string2key",
475 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
477 printf("No native iconv library - can't run iconv test\n");