2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
33 generate a UTF-16LE buffer for a given unicode codepoint
35 static int gen_codepoint_utf16(unsigned int codepoint,
36 char *buf, size_t *size)
41 size_t size_in, size_out, ret;
43 cd = iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd == (iconv_t)-1) {
50 in[0] = codepoint & 0xFF;
51 in[1] = (codepoint>>8) & 0xFF;
52 in[2] = (codepoint>>16) & 0xFF;
53 in[3] = (codepoint>>24) & 0xFF;
59 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
68 work out the unicode codepoint of the first UTF-8 character in the buffer
70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
75 size_t size_out, size_in, ret;
77 cd = iconv_open("UCS-4LE", charset);
80 ptr_out = (char *)out;
81 size_out = sizeof(out);
82 memset(out, 0, sizeof(out));
84 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
88 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
92 display a buffer with name prefix
94 static void show_buf(const char *name, uint8_t *buf, size_t size)
98 for (i=0;i<size;i++) {
99 printf("%02x ", buf[i]);
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
110 static bool test_buffer(struct torture_context *test,
111 uint8_t *inbuf, size_t size, const char *charset)
113 uint8_t buf1[1000], buf2[1000], buf3[1000];
114 size_t outsize1, outsize2, outsize3;
117 size_t size_in1, size_in2, size_in3;
118 size_t ret1, ret2, ret3, len1, len2;
121 static smb_iconv_t cd2, cd3;
122 static const char *last_charset;
124 if (cd && last_charset) {
126 smb_iconv_close(cd2);
127 smb_iconv_close(cd3);
132 cd = iconv_open(charset, "UTF-16LE");
133 if (cd == (iconv_t)-1) {
137 cd2 = smb_iconv_open(charset, "UTF-16LE");
138 cd3 = smb_iconv_open("UTF-16LE", charset);
139 last_charset = charset;
142 /* internal convert to charset - placing result in buf1 */
143 ptr_in = (const char *)inbuf;
144 ptr_out = (char *)buf1;
146 outsize1 = sizeof(buf1);
148 memset(ptr_out, 0, outsize1);
150 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
153 /* system convert to charset - placing result in buf2 */
154 ptr_in = (const char *)inbuf;
155 ptr_out = (char *)buf2;
157 outsize2 = sizeof(buf2);
159 memset(ptr_out, 0, outsize2);
161 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
164 len1 = sizeof(buf1) - outsize1;
165 len2 = sizeof(buf2) - outsize2;
167 /* codepoints above 1M are not interesting for now */
169 memcmp(buf1, buf2, len1) == 0 &&
170 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
174 memcmp(buf1, buf2, len2) == 0 &&
175 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
179 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
181 if (errno1 != errno2) {
182 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
183 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
184 torture_fail(test, talloc_asprintf(test,
185 "e1=%s e2=%s", strerror(errno1), strerror(errno2)));
188 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
190 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
193 memcmp(buf1, buf2, len1) != 0) {
194 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
195 show_buf(" IN1:", inbuf, size-size_in1);
196 show_buf(" IN2:", inbuf, size-size_in2);
197 show_buf("OUT1:", buf1, len1);
198 show_buf("OUT2:", buf2, len2);
199 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
200 torture_comment(test, "next codepoint is %u",
201 get_codepoint((char *)(buf2+len1), len2-len1, charset));
203 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
204 torture_comment(test, "next codepoint is %u",
205 get_codepoint((char *)(buf1+len2),len1-len2, charset));
208 torture_fail(test, "failed");
211 /* convert back to UTF-16, putting result in buf3 */
212 size = size - size_in1;
213 ptr_in = (const char *)buf1;
214 ptr_out = (char *)buf3;
216 outsize3 = sizeof(buf3);
218 memset(ptr_out, 0, outsize3);
219 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
221 /* we only internally support the first 1M codepoints */
222 if (outsize3 != sizeof(buf3) - size &&
223 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
224 size - (sizeof(buf3) - outsize3),
225 "UTF-16LE") >= (1<<20)) {
229 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
230 "pull failed - %s", strerror(errno)));
232 if (strncmp(charset, "UTF", 3) != 0) {
233 /* don't expect perfect mappings for non UTF charsets */
238 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
241 if (memcmp(buf3, inbuf, size) != 0) {
242 torture_comment(test, "pull bytes mismatch:");
243 show_buf("inbuf", inbuf, size);
244 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
245 torture_fail(test, "");
246 torture_comment(test, "next codepoint is %u\n",
247 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
248 size - (sizeof(buf3) - outsize3),
257 test the push_codepoint() and next_codepoint() functions for a given
260 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
266 size = push_codepoint((char *)buf, codepoint);
267 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000), "Invalid Codepoint range");
268 buf[size] = random();
269 buf[size+1] = random();
270 buf[size+2] = random();
271 buf[size+3] = random();
273 c = next_codepoint((char *)buf, &size2);
275 torture_assert(tctx, c == codepoint, talloc_asprintf(tctx,
276 "next_codepoint(%u) failed - gave %u", codepoint, c));
278 torture_assert(tctx, size2 == size,
279 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
280 codepoint, (int)size2, (int)size));
285 static bool test_next_codepoint(struct torture_context *tctx)
287 unsigned int codepoint;
288 for (codepoint=0;codepoint<(1<<20);codepoint++) {
289 if (!test_codepoint(tctx, codepoint))
295 static bool test_first_1m(struct torture_context *tctx)
297 unsigned int codepoint;
299 unsigned char inbuf[1000];
301 for (codepoint=0;codepoint<(1<<20);codepoint++) {
302 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
306 if (codepoint % 1000 == 0) {
307 if (!lp_parm_bool(-1, "torture", "progress", True)) {
308 printf("codepoint=%u \r", codepoint);
312 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
318 static bool test_random_5m(struct torture_context *tctx)
320 unsigned char inbuf[1000];
322 for (i=0;i<500000;i++) {
327 if (!lp_parm_bool(-1, "torture", "progress", True)) {
328 torture_comment(tctx, "i=%u \r", i);
332 size = random() % 100;
333 for (c=0;c<size;c++) {
334 if (random() % 100 < 80) {
335 inbuf[c] = random() % 128;
339 if (random() % 10 == 0) {
342 if (random() % 10 == 0) {
346 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
349 if (!test_buffer(tctx, inbuf, size, "CP850"))
355 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
358 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
360 if (!lp_parm_bool(-1, "iconv", "native", True)) {
361 printf("system iconv disabled - skipping test\n");
365 cd = iconv_open("UTF-16LE", "UCS-4LE");
366 if (cd == (iconv_t)-1) {
367 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
373 torture_suite_add_simple_test(suite, "next_codepoint()",
374 test_next_codepoint);
376 torture_suite_add_simple_test(suite, "first 1M codepoints",
379 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
386 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
388 printf("No native iconv library - can't run iconv test\n");