2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
33 generate a UTF-16LE buffer for a given unicode codepoint
35 static int gen_codepoint_utf16(unsigned int codepoint,
36 char *buf, size_t *size)
41 size_t size_in, size_out, ret;
43 cd = iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd == (iconv_t)-1) {
50 in[0] = codepoint & 0xFF;
51 in[1] = (codepoint>>8) & 0xFF;
52 in[2] = (codepoint>>16) & 0xFF;
53 in[3] = (codepoint>>24) & 0xFF;
59 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
68 work out the unicode codepoint of the first UTF-8 character in the buffer
70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
75 size_t size_out, size_in, ret;
77 cd = iconv_open("UCS-4LE", charset);
81 size_out = sizeof(out);
82 memset(out, 0, sizeof(out));
84 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
88 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
92 display a buffer with name prefix
94 static void show_buf(const char *name, uint8_t *buf, size_t size)
98 for (i=0;i<size;i++) {
99 printf("%02x ", buf[i]);
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
110 static int test_buffer(struct torture_context *test,
111 uint8_t *inbuf, size_t size, const char *charset)
113 uint8_t buf1[1000], buf2[1000], buf3[1000];
114 size_t outsize1, outsize2, outsize3;
117 size_t size_in1, size_in2, size_in3;
118 size_t ret1, ret2, ret3, len1, len2;
122 static smb_iconv_t cd2, cd3;
123 static const char *last_charset;
125 if (cd && last_charset) {
127 smb_iconv_close(cd2);
128 smb_iconv_close(cd3);
133 cd = iconv_open(charset, "UTF-16LE");
134 if (cd == (iconv_t)-1) {
138 cd2 = smb_iconv_open(charset, "UTF-16LE");
139 cd3 = smb_iconv_open("UTF-16LE", charset);
140 last_charset = charset;
143 /* internal convert to charset - placing result in buf1 */
147 outsize1 = sizeof(buf1);
149 memset(ptr_out, 0, outsize1);
151 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
154 /* system convert to charset - placing result in buf2 */
158 outsize2 = sizeof(buf2);
160 memset(ptr_out, 0, outsize2);
162 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
165 len1 = sizeof(buf1) - outsize1;
166 len2 = sizeof(buf2) - outsize2;
168 /* codepoints above 1M are not interesting for now */
170 memcmp(buf1, buf2, len1) == 0 &&
171 get_codepoint(buf2+len1, len2-len1, charset) >= (1<<20)) {
175 memcmp(buf1, buf2, len2) == 0 &&
176 get_codepoint(buf1+len2, len1-len2, charset) >= (1<<20)) {
181 torture_fail(test, "ret1=%d ret2=%d", (int)ret1, (int)ret2);
185 if (errno1 != errno2) {
186 torture_fail(test, "e1=%s e2=%s", strerror(errno1), strerror(errno2));
187 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
188 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
192 if (outsize1 != outsize2) {
193 torture_fail(test, "outsize mismatch outsize1=%d outsize2=%d",
194 (int)outsize1, (int)outsize2);
198 if (size_in1 != size_in2) {
199 torture_fail(test, "size_in mismatch size_in1=%d size_in2=%d",
200 (int)size_in1, (int)size_in2);
206 memcmp(buf1, buf2, len1) != 0) {
207 torture_fail(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
208 show_buf(" IN1:", inbuf, size-size_in1);
209 show_buf(" IN2:", inbuf, size-size_in2);
210 show_buf("OUT1:", buf1, len1);
211 show_buf("OUT2:", buf2, len2);
212 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
213 torture_comment(test, "next codepoint is %u",
214 get_codepoint(buf2+len1, len2-len1, charset));
216 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
217 torture_comment(test, "next codepoint is %u",
218 get_codepoint(buf1+len2,len1-len2, charset));
224 /* convert back to UTF-16, putting result in buf3 */
225 size = size - size_in1;
229 outsize3 = sizeof(buf3);
231 memset(ptr_out, 0, outsize3);
232 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
234 /* we only internally support the first 1M codepoints */
235 if (outsize3 != sizeof(buf3) - size &&
236 get_codepoint(inbuf+sizeof(buf3) - outsize3,
237 size - (sizeof(buf3) - outsize3),
238 "UTF-16LE") >= (1<<20)) {
243 torture_fail(test, "pull failed - %s", strerror(errno));
247 if (strncmp(charset, "UTF", 3) != 0) {
248 /* don't expect perfect mappings for non UTF charsets */
253 if (outsize3 != sizeof(buf3) - size) {
254 torture_fail(test, "wrong outsize3 - %d should be %d",
255 (int)outsize3, (int)(sizeof(buf3) - size));
259 if (memcmp(buf3, inbuf, size) != 0) {
260 torture_fail(test, "pull bytes mismatch:");
261 show_buf("inbuf", inbuf, size);
262 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
264 torture_comment(test, "next codepoint is %u\n",
265 get_codepoint(inbuf+sizeof(buf3) - outsize3,
266 size - (sizeof(buf3) - outsize3),
271 torture_fail(test, "test_buffer failed for charset %s", charset);
279 test the push_codepoint() and next_codepoint() functions for a given
282 static int test_codepoint(struct torture_context *test, const void *data)
286 unsigned int codepoint = *((const unsigned int *)data);
289 size = push_codepoint(buf, codepoint);
291 torture_assert(test, codepoint >= 0xd800 && codepoint <= 0x10000, NULL);
294 buf[size] = random();
295 buf[size+1] = random();
296 buf[size+2] = random();
297 buf[size+3] = random();
299 c = next_codepoint(buf, &size2);
301 if (c != codepoint) {
302 torture_fail(test, "next_codepoint(%u) failed - gave %u", codepoint, c);
307 torture_fail(test, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
308 codepoint, (int)size2, (int)size);
315 static BOOL test_next_codepoint(struct torture_context *test, const void *data)
317 unsigned int codepoint;
318 for (codepoint=0;codepoint<(1<<20);codepoint++) {
319 if (!test_codepoint(test, &codepoint))
325 static BOOL test_first_1m(struct torture_context *test, const void *data)
327 unsigned int codepoint;
329 unsigned char inbuf[1000];
331 for (codepoint=0;codepoint<(1<<20);codepoint++) {
332 if (gen_codepoint_utf16(codepoint, inbuf, &size) != 0) {
336 if (codepoint % 1000 == 0) {
337 if (!lp_parm_bool(-1, "torture", "progress", True)) {
338 printf("codepoint=%u \r", codepoint);
342 if (!test_buffer(test, inbuf, size, "UTF-8"))
349 static BOOL test_random_5m(struct torture_context *test, const void *data)
351 unsigned char inbuf[1000];
353 for (i=0;i<500000;i++) {
358 if (!lp_parm_bool(-1, "torture", "progress", True)) {
359 torture_comment(test, "i=%u \r", i);
363 size = random() % 100;
364 for (c=0;c<size;c++) {
365 if (random() % 100 < 80) {
366 inbuf[c] = random() % 128;
370 if (random() % 10 == 0) {
373 if (random() % 10 == 0) {
377 if (!test_buffer(test, inbuf, size, "UTF-8"))
380 if (!test_buffer(test, inbuf, size, "CP850"))
387 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
390 struct torture_suite *suite = torture_suite_create(mem_ctx, "LOCAL-ICONV");
394 cd = iconv_open("UTF-16LE", "UCS-4LE");
395 if (cd == (iconv_t)-1) {
396 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
401 torture_suite_add_simple_tcase(suite, "next_codepoint()",
402 test_next_codepoint, NULL);
404 torture_suite_add_simple_tcase(suite, "first 1M codepoints",
405 test_first_1m, NULL);
407 torture_suite_add_simple_tcase(suite, "5M random UTF-16LE sequences",
408 test_random_5m, NULL);
415 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
417 printf("No native iconv library - can't run iconv test\n");