2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 generate a UTF-16LE buffer for a given unicode codepoint
30 static int gen_codepoint_utf16(unsigned int codepoint,
31 char *buf, size_t *size)
36 size_t size_in, size_out, ret;
38 cd = iconv_open("UTF-16LE", "UCS-4LE");
41 in[0] = codepoint & 0xFF;
42 in[1] = (codepoint>>8) & 0xFF;
43 in[2] = (codepoint>>16) & 0xFF;
44 in[3] = (codepoint>>24) & 0xFF;
50 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
59 work out the unicode codepoint of the first UTF-8 character in the buffer
61 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
66 size_t size_out, size_in, ret;
68 cd = iconv_open("UCS-4LE", charset);
72 size_out = sizeof(out);
73 memset(out, 0, sizeof(out));
75 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
79 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
83 display a buffer with name prefix
85 static void show_buf(const char *name, uint8_t *buf, size_t size)
89 for (i=0;i<size;i++) {
90 printf("%02x ", buf[i]);
96 given a UTF-16LE buffer, test the system and built-in iconv code to
97 make sure they do exactly the same thing in converting the buffer to
98 "charset", then convert it back again and ensure we get the same
101 static int test_buffer(uint8_t *inbuf, size_t size, const char *charset)
103 uint8_t buf1[1000], buf2[1000], buf3[1000];
104 size_t outsize1, outsize2, outsize3;
107 size_t size_in1, size_in2, size_in3;
108 size_t ret1, ret2, ret3, len1, len2;
112 static smb_iconv_t cd2, cd3;
113 static const char *last_charset;
115 if (cd && last_charset) {
117 smb_iconv_close(cd2);
118 smb_iconv_close(cd3);
123 cd = iconv_open(charset, "UTF-16LE");
124 cd2 = smb_iconv_open(charset, "UTF-16LE");
125 cd3 = smb_iconv_open("UTF-16LE", charset);
126 last_charset = charset;
135 outsize1 = sizeof(buf1);
137 memset(ptr_out, 0, outsize1);
139 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
147 outsize1 = sizeof(buf1);
149 memset(ptr_out, 0, outsize1);
151 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
157 outsize2 = sizeof(buf2);
159 memset(ptr_out, 0, outsize2);
161 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
164 len1 = sizeof(buf1) - outsize1;
165 len2 = sizeof(buf2) - outsize2;
167 /* codepoints above 1M are not interesting for now */
169 memcmp(buf1, buf2, len1) == 0 &&
170 get_codepoint(buf2+len1, len2-len1, charset) >= (1<<20)) {
174 memcmp(buf1, buf2, len2) == 0 &&
175 get_codepoint(buf1+len2, len1-len2, charset) >= (1<<20)) {
180 printf("ret1=%d ret2=%d\n", ret1, ret2);
184 if (errno1 != errno2) {
185 printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
186 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
187 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
191 if (outsize1 != outsize2) {
192 printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
197 if (size_in1 != size_in2) {
198 printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
205 memcmp(buf1, buf2, len1) != 0) {
206 printf("\nsize=%d ret1=%d ret2=%d\n", size, ret1, ret2);
207 show_buf(" IN1:", inbuf, size-size_in1);
208 show_buf(" IN2:", inbuf, size-size_in2);
209 show_buf("OUT1:", buf1, len1);
210 show_buf("OUT2:", buf2, len2);
211 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
212 printf("next codepoint is %u\n",
213 get_codepoint(buf2+len1, len2-len1, charset));
215 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
216 printf("next codepoint is %u\n",
217 get_codepoint(buf1+len2,len1-len2, charset));
223 size = size - size_in1;
227 outsize3 = sizeof(buf3);
229 memset(ptr_out, 0, outsize3);
230 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
233 printf("pull failed - %s\n", strerror(errno));
237 if (outsize3 != sizeof(buf3) - size) {
238 printf("wrong outsize3 - %d should be %d\n",
239 outsize3, sizeof(buf3) - size);
243 if (memcmp(buf3, inbuf, size) != 0) {
244 printf("pull bytes mismatch:\n");
245 show_buf("inbuf", inbuf, size);
246 show_buf(" buf3", buf3, size);
251 printf("test_buffer failed for charset %s\n", charset);
259 test the push_codepoint() and next_codepoint() functions for a given
262 static int test_codepoint(unsigned int codepoint)
268 size = push_codepoint(buf, codepoint);
270 if (codepoint < 0xd800 || codepoint > 0x10000) {
275 buf[size] = random();
276 buf[size+1] = random();
277 buf[size+2] = random();
278 buf[size+3] = random();
280 c = next_codepoint(buf, &size2);
282 if (c != codepoint) {
283 printf("next_codepoint(%u) failed - gave %u\n", codepoint, c);
288 printf("next_codepoint(%u) gave wrong size %d (should be %d)\n",
289 codepoint, size2, size);
296 BOOL torture_local_iconv(void)
299 unsigned char inbuf[1000];
301 unsigned int codepoint, i, c;
305 printf("Testing next_codepoint()\n");
306 for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
307 ok = test_codepoint(codepoint);
310 printf("Testing first 1M codepoints\n");
311 for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
312 if (gen_codepoint_utf16(codepoint, inbuf, &size) != 0) {
316 if (codepoint % 1000 == 0) {
317 printf("codepoint=%u \r", codepoint);
320 ok = test_buffer(inbuf, size, "UTF-8");
324 printf("Testing 5M random UTF-16LE sequences\n");
325 for (i=0;ok && i<500000;i++) {
327 printf("i=%u \r", i);
330 size = random() % 100;
331 for (c=0;c<size;c++) {
332 if (random() % 100 < 80) {
333 inbuf[c] = random() % 128;
337 if (random() % 10 == 0) {
340 if (random() % 10 == 0) {
344 ok &= test_buffer(inbuf, size, "UTF-8");
345 ok &= test_buffer(inbuf, size, "CP850");
354 BOOL torture_local_iconv(void)
356 printf("No native iconv library - can't run iconv test\n");