2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/iconv.h"
26 #include "system/time.h"
30 generate a UTF-16LE buffer for a given unicode codepoint
32 static int gen_codepoint_utf16(unsigned int codepoint,
33 char *buf, size_t *size)
38 size_t size_in, size_out, ret;
40 cd = iconv_open("UTF-16LE", "UCS-4LE");
41 if (cd == (iconv_t)-1) {
47 in[0] = codepoint & 0xFF;
48 in[1] = (codepoint>>8) & 0xFF;
49 in[2] = (codepoint>>16) & 0xFF;
50 in[3] = (codepoint>>24) & 0xFF;
56 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
65 work out the unicode codepoint of the first UTF-8 character in the buffer
67 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
72 size_t size_out, size_in, ret;
74 cd = iconv_open("UCS-4LE", charset);
78 size_out = sizeof(out);
79 memset(out, 0, sizeof(out));
81 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
85 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
89 display a buffer with name prefix
91 static void show_buf(const char *name, uint8_t *buf, size_t size)
95 for (i=0;i<size;i++) {
96 printf("%02x ", buf[i]);
102 given a UTF-16LE buffer, test the system and built-in iconv code to
103 make sure they do exactly the same thing in converting the buffer to
104 "charset", then convert it back again and ensure we get the same
107 static int test_buffer(uint8_t *inbuf, size_t size, const char *charset)
109 uint8_t buf1[1000], buf2[1000], buf3[1000];
110 size_t outsize1, outsize2, outsize3;
113 size_t size_in1, size_in2, size_in3;
114 size_t ret1, ret2, ret3, len1, len2;
118 static smb_iconv_t cd2, cd3;
119 static const char *last_charset;
121 if (cd && last_charset) {
123 smb_iconv_close(cd2);
124 smb_iconv_close(cd3);
129 cd = iconv_open(charset, "UTF-16LE");
130 if (cd == (iconv_t)-1) {
134 cd2 = smb_iconv_open(charset, "UTF-16LE");
135 cd3 = smb_iconv_open("UTF-16LE", charset);
136 last_charset = charset;
139 /* internal convert to charset - placing result in buf1 */
143 outsize1 = sizeof(buf1);
145 memset(ptr_out, 0, outsize1);
147 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
150 /* system convert to charset - placing result in buf2 */
154 outsize2 = sizeof(buf2);
156 memset(ptr_out, 0, outsize2);
158 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
161 len1 = sizeof(buf1) - outsize1;
162 len2 = sizeof(buf2) - outsize2;
164 /* codepoints above 1M are not interesting for now */
166 memcmp(buf1, buf2, len1) == 0 &&
167 get_codepoint(buf2+len1, len2-len1, charset) >= (1<<20)) {
171 memcmp(buf1, buf2, len2) == 0 &&
172 get_codepoint(buf1+len2, len1-len2, charset) >= (1<<20)) {
177 printf("ret1=%d ret2=%d\n", (int)ret1, (int)ret2);
181 if (errno1 != errno2) {
182 printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
183 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
184 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
188 if (outsize1 != outsize2) {
189 printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
190 (int)outsize1, (int)outsize2);
194 if (size_in1 != size_in2) {
195 printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
196 (int)size_in1, (int)size_in2);
202 memcmp(buf1, buf2, len1) != 0) {
203 printf("\nsize=%d ret1=%d ret2=%d\n", (int)size, (int)ret1, (int)ret2);
204 show_buf(" IN1:", inbuf, size-size_in1);
205 show_buf(" IN2:", inbuf, size-size_in2);
206 show_buf("OUT1:", buf1, len1);
207 show_buf("OUT2:", buf2, len2);
208 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
209 printf("next codepoint is %u\n",
210 get_codepoint(buf2+len1, len2-len1, charset));
212 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
213 printf("next codepoint is %u\n",
214 get_codepoint(buf1+len2,len1-len2, charset));
220 /* convert back to UTF-16, putting result in buf3 */
221 size = size - size_in1;
225 outsize3 = sizeof(buf3);
227 memset(ptr_out, 0, outsize3);
228 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
230 /* we only internally support the first 1M codepoints */
231 if (outsize3 != sizeof(buf3) - size &&
232 get_codepoint(inbuf+sizeof(buf3) - outsize3,
233 size - (sizeof(buf3) - outsize3),
234 "UTF-16LE") >= (1<<20)) {
239 printf("pull failed - %s\n", strerror(errno));
243 if (strncmp(charset, "UTF", 3) != 0) {
244 /* don't expect perfect mappings for non UTF charsets */
249 if (outsize3 != sizeof(buf3) - size) {
250 printf("wrong outsize3 - %d should be %d\n",
251 (int)outsize3, (int)(sizeof(buf3) - size));
255 if (memcmp(buf3, inbuf, size) != 0) {
256 printf("pull bytes mismatch:\n");
257 show_buf("inbuf", inbuf, size);
258 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
260 printf("next codepoint is %u\n",
261 get_codepoint(inbuf+sizeof(buf3) - outsize3,
262 size - (sizeof(buf3) - outsize3),
267 printf("test_buffer failed for charset %s\n", charset);
275 test the push_codepoint() and next_codepoint() functions for a given
278 static int test_codepoint(unsigned int codepoint)
284 size = push_codepoint(buf, codepoint);
286 if (codepoint < 0xd800 || codepoint > 0x10000) {
291 buf[size] = random();
292 buf[size+1] = random();
293 buf[size+2] = random();
294 buf[size+3] = random();
296 c = next_codepoint(buf, &size2);
298 if (c != codepoint) {
299 printf("next_codepoint(%u) failed - gave %u\n", codepoint, c);
304 printf("next_codepoint(%u) gave wrong size %d (should be %d)\n",
305 codepoint, (int)size2, (int)size);
312 BOOL torture_local_iconv(void)
315 unsigned char inbuf[1000];
317 unsigned int codepoint, i, c;
322 cd = iconv_open("UTF-16LE", "UCS-4LE");
323 if (cd == (iconv_t)-1) {
324 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
329 printf("Testing next_codepoint()\n");
330 for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
331 ok = test_codepoint(codepoint);
334 printf("Testing first 1M codepoints\n");
335 for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
336 if (gen_codepoint_utf16(codepoint, inbuf, &size) != 0) {
340 if (codepoint % 1000 == 0) {
341 if (!lp_parm_bool(-1, "torture", "progress", True)) {
342 printf("codepoint=%u \r", codepoint);
346 ok = test_buffer(inbuf, size, "UTF-8");
350 printf("Testing 5M random UTF-16LE sequences\n");
351 for (i=0;ok && i<500000;i++) {
353 if (!lp_parm_bool(-1, "torture", "progress", True)) {
354 printf("i=%u \r", i);
358 size = random() % 100;
359 for (c=0;c<size;c++) {
360 if (random() % 100 < 80) {
361 inbuf[c] = random() % 128;
365 if (random() % 10 == 0) {
368 if (random() % 10 == 0) {
372 ok &= test_buffer(inbuf, size, "UTF-8");
373 ok &= test_buffer(inbuf, size, "CP850");
382 BOOL torture_local_iconv(void)
384 printf("No native iconv library - can't run iconv test\n");