2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
33 generate a UTF-16LE buffer for a given unicode codepoint
35 static int gen_codepoint_utf16(unsigned int codepoint,
36 char *buf, size_t *size)
41 size_t size_in, size_out, ret;
43 cd = iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd == (iconv_t)-1) {
50 in[0] = codepoint & 0xFF;
51 in[1] = (codepoint>>8) & 0xFF;
52 in[2] = (codepoint>>16) & 0xFF;
53 in[3] = (codepoint>>24) & 0xFF;
59 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
68 work out the unicode codepoint of the first UTF-8 character in the buffer
70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
75 size_t size_out, size_in, ret;
77 cd = iconv_open("UCS-4LE", charset);
81 size_out = sizeof(out);
82 memset(out, 0, sizeof(out));
84 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
88 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
92 display a buffer with name prefix
94 static void show_buf(const char *name, uint8_t *buf, size_t size)
98 for (i=0;i<size;i++) {
99 printf("%02x ", buf[i]);
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
110 static int test_buffer(uint8_t *inbuf, size_t size, const char *charset)
112 uint8_t buf1[1000], buf2[1000], buf3[1000];
113 size_t outsize1, outsize2, outsize3;
116 size_t size_in1, size_in2, size_in3;
117 size_t ret1, ret2, ret3, len1, len2;
121 static smb_iconv_t cd2, cd3;
122 static const char *last_charset;
124 if (cd && last_charset) {
126 smb_iconv_close(cd2);
127 smb_iconv_close(cd3);
132 cd = iconv_open(charset, "UTF-16LE");
133 if (cd == (iconv_t)-1) {
137 cd2 = smb_iconv_open(charset, "UTF-16LE");
138 cd3 = smb_iconv_open("UTF-16LE", charset);
139 last_charset = charset;
142 /* internal convert to charset - placing result in buf1 */
146 outsize1 = sizeof(buf1);
148 memset(ptr_out, 0, outsize1);
150 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
153 /* system convert to charset - placing result in buf2 */
157 outsize2 = sizeof(buf2);
159 memset(ptr_out, 0, outsize2);
161 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
164 len1 = sizeof(buf1) - outsize1;
165 len2 = sizeof(buf2) - outsize2;
167 /* codepoints above 1M are not interesting for now */
169 memcmp(buf1, buf2, len1) == 0 &&
170 get_codepoint(buf2+len1, len2-len1, charset) >= (1<<20)) {
174 memcmp(buf1, buf2, len2) == 0 &&
175 get_codepoint(buf1+len2, len1-len2, charset) >= (1<<20)) {
180 printf("ret1=%d ret2=%d\n", (int)ret1, (int)ret2);
184 if (errno1 != errno2) {
185 printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
186 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
187 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
191 if (outsize1 != outsize2) {
192 printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
193 (int)outsize1, (int)outsize2);
197 if (size_in1 != size_in2) {
198 printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
199 (int)size_in1, (int)size_in2);
205 memcmp(buf1, buf2, len1) != 0) {
206 printf("\nsize=%d ret1=%d ret2=%d\n", (int)size, (int)ret1, (int)ret2);
207 show_buf(" IN1:", inbuf, size-size_in1);
208 show_buf(" IN2:", inbuf, size-size_in2);
209 show_buf("OUT1:", buf1, len1);
210 show_buf("OUT2:", buf2, len2);
211 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
212 printf("next codepoint is %u\n",
213 get_codepoint(buf2+len1, len2-len1, charset));
215 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
216 printf("next codepoint is %u\n",
217 get_codepoint(buf1+len2,len1-len2, charset));
223 /* convert back to UTF-16, putting result in buf3 */
224 size = size - size_in1;
228 outsize3 = sizeof(buf3);
230 memset(ptr_out, 0, outsize3);
231 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
233 /* we only internally support the first 1M codepoints */
234 if (outsize3 != sizeof(buf3) - size &&
235 get_codepoint(inbuf+sizeof(buf3) - outsize3,
236 size - (sizeof(buf3) - outsize3),
237 "UTF-16LE") >= (1<<20)) {
242 printf("pull failed - %s\n", strerror(errno));
246 if (strncmp(charset, "UTF", 3) != 0) {
247 /* don't expect perfect mappings for non UTF charsets */
252 if (outsize3 != sizeof(buf3) - size) {
253 printf("wrong outsize3 - %d should be %d\n",
254 (int)outsize3, (int)(sizeof(buf3) - size));
258 if (memcmp(buf3, inbuf, size) != 0) {
259 printf("pull bytes mismatch:\n");
260 show_buf("inbuf", inbuf, size);
261 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
263 printf("next codepoint is %u\n",
264 get_codepoint(inbuf+sizeof(buf3) - outsize3,
265 size - (sizeof(buf3) - outsize3),
270 printf("test_buffer failed for charset %s\n", charset);
278 test the push_codepoint() and next_codepoint() functions for a given
281 static int test_codepoint(unsigned int codepoint)
287 size = push_codepoint(buf, codepoint);
289 if (codepoint < 0xd800 || codepoint > 0x10000) {
294 buf[size] = random();
295 buf[size+1] = random();
296 buf[size+2] = random();
297 buf[size+3] = random();
299 c = next_codepoint(buf, &size2);
301 if (c != codepoint) {
302 printf("next_codepoint(%u) failed - gave %u\n", codepoint, c);
307 printf("next_codepoint(%u) gave wrong size %d (should be %d)\n",
308 codepoint, (int)size2, (int)size);
315 BOOL torture_local_iconv(void)
318 unsigned char inbuf[1000];
320 unsigned int codepoint, i, c;
325 cd = iconv_open("UTF-16LE", "UCS-4LE");
326 if (cd == (iconv_t)-1) {
327 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
332 printf("Testing next_codepoint()\n");
333 for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
334 ok = test_codepoint(codepoint);
337 printf("Testing first 1M codepoints\n");
338 for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
339 if (gen_codepoint_utf16(codepoint, inbuf, &size) != 0) {
343 if (codepoint % 1000 == 0) {
344 if (!lp_parm_bool(-1, "torture", "progress", True)) {
345 printf("codepoint=%u \r", codepoint);
349 ok = test_buffer(inbuf, size, "UTF-8");
353 printf("Testing 5M random UTF-16LE sequences\n");
354 for (i=0;ok && i<500000;i++) {
356 if (!lp_parm_bool(-1, "torture", "progress", True)) {
357 printf("i=%u \r", i);
361 size = random() % 100;
362 for (c=0;c<size;c++) {
363 if (random() % 100 < 80) {
364 inbuf[c] = random() % 128;
368 if (random() % 10 == 0) {
371 if (random() % 10 == 0) {
375 ok &= test_buffer(inbuf, size, "UTF-8");
376 ok &= test_buffer(inbuf, size, "CP850");
385 BOOL torture_local_iconv(void)
387 printf("No native iconv library - can't run iconv test\n");