2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
33 generate a UTF-16LE buffer for a given unicode codepoint
35 static int gen_codepoint_utf16(unsigned int codepoint,
36 char *buf, size_t *size)
41 size_t size_in, size_out, ret;
43 cd = iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd == (iconv_t)-1) {
50 in[0] = codepoint & 0xFF;
51 in[1] = (codepoint>>8) & 0xFF;
52 in[2] = (codepoint>>16) & 0xFF;
53 in[3] = (codepoint>>24) & 0xFF;
59 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
68 work out the unicode codepoint of the first UTF-8 character in the buffer
70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
75 size_t size_out, size_in, ret;
77 cd = iconv_open("UCS-4LE", charset);
80 ptr_out = (char *)out;
81 size_out = sizeof(out);
82 memset(out, 0, sizeof(out));
84 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
88 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
92 display a buffer with name prefix
94 static void show_buf(const char *name, uint8_t *buf, size_t size)
98 for (i=0;i<size;i++) {
99 printf("%02x ", buf[i]);
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
110 static bool test_buffer(struct torture_context *test,
111 uint8_t *inbuf, size_t size, const char *charset)
113 uint8_t buf1[1000], buf2[1000], buf3[1000];
114 size_t outsize1, outsize2, outsize3;
117 size_t size_in1, size_in2, size_in3;
118 size_t ret1, ret2, ret3, len1, len2;
121 static smb_iconv_t cd2, cd3;
122 static const char *last_charset;
124 if (cd && last_charset) {
126 smb_iconv_close(cd2);
127 smb_iconv_close(cd3);
132 cd = iconv_open(charset, "UTF-16LE");
133 if (cd == (iconv_t)-1) {
135 talloc_asprintf(test,
136 "failed to open %s to UTF-16LE\n",
141 cd2 = smb_iconv_open(charset, "UTF-16LE");
142 cd3 = smb_iconv_open("UTF-16LE", charset);
143 last_charset = charset;
146 /* internal convert to charset - placing result in buf1 */
147 ptr_in = (const char *)inbuf;
148 ptr_out = (char *)buf1;
150 outsize1 = sizeof(buf1);
152 memset(ptr_out, 0, outsize1);
154 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
157 /* system convert to charset - placing result in buf2 */
158 ptr_in = (const char *)inbuf;
159 ptr_out = (char *)buf2;
161 outsize2 = sizeof(buf2);
163 memset(ptr_out, 0, outsize2);
165 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
168 len1 = sizeof(buf1) - outsize1;
169 len2 = sizeof(buf2) - outsize2;
171 /* codepoints above 1M are not interesting for now */
173 memcmp(buf1, buf2, len1) == 0 &&
174 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
178 memcmp(buf1, buf2, len2) == 0 &&
179 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
183 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
185 if (errno1 != errno2) {
186 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
187 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
188 torture_fail(test, talloc_asprintf(test,
189 "e1=%s e2=%s", strerror(errno1), strerror(errno2)));
192 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
194 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
197 memcmp(buf1, buf2, len1) != 0) {
198 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
199 show_buf(" IN1:", inbuf, size-size_in1);
200 show_buf(" IN2:", inbuf, size-size_in2);
201 show_buf("OUT1:", buf1, len1);
202 show_buf("OUT2:", buf2, len2);
203 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
204 torture_comment(test, "next codepoint is %u",
205 get_codepoint((char *)(buf2+len1), len2-len1, charset));
207 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
208 torture_comment(test, "next codepoint is %u",
209 get_codepoint((char *)(buf1+len2),len1-len2, charset));
212 torture_fail(test, "failed");
215 /* convert back to UTF-16, putting result in buf3 */
216 size = size - size_in1;
217 ptr_in = (const char *)buf1;
218 ptr_out = (char *)buf3;
220 outsize3 = sizeof(buf3);
222 memset(ptr_out, 0, outsize3);
223 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
225 /* we only internally support the first 1M codepoints */
226 if (outsize3 != sizeof(buf3) - size &&
227 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
228 size - (sizeof(buf3) - outsize3),
229 "UTF-16LE") >= (1<<20)) {
233 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
234 "pull failed - %s", strerror(errno)));
236 if (strncmp(charset, "UTF", 3) != 0) {
237 /* don't expect perfect mappings for non UTF charsets */
242 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
245 if (memcmp(buf3, inbuf, size) != 0) {
246 torture_comment(test, "pull bytes mismatch:");
247 show_buf("inbuf", inbuf, size);
248 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
249 torture_fail(test, "");
250 torture_comment(test, "next codepoint is %u\n",
251 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
252 size - (sizeof(buf3) - outsize3),
261 test the push_codepoint() and next_codepoint() functions for a given
264 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
270 size = push_codepoint((char *)buf, codepoint);
271 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
272 "Invalid Codepoint range");
274 if (size == -1) return true;
276 buf[size] = random();
277 buf[size+1] = random();
278 buf[size+2] = random();
279 buf[size+3] = random();
281 c = next_codepoint((char *)buf, &size2);
283 torture_assert(tctx, c == codepoint,
284 talloc_asprintf(tctx,
285 "next_codepoint(%u) failed - gave %u", codepoint, c));
287 torture_assert(tctx, size2 == size,
288 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
289 codepoint, (int)size2, (int)size));
294 static bool test_next_codepoint(struct torture_context *tctx)
296 unsigned int codepoint;
297 for (codepoint=0;codepoint<(1<<20);codepoint++) {
298 if (!test_codepoint(tctx, codepoint))
304 static bool test_first_1m(struct torture_context *tctx)
306 unsigned int codepoint;
308 unsigned char inbuf[1000];
310 for (codepoint=0;codepoint<(1<<20);codepoint++) {
311 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
315 if (codepoint % 1000 == 0) {
316 if (torture_setting_bool(tctx, "progress", True)) {
317 torture_comment(tctx, "codepoint=%u \r", codepoint);
321 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
327 static bool test_random_5m(struct torture_context *tctx)
329 unsigned char inbuf[1000];
331 for (i=0;i<500000;i++) {
336 if (torture_setting_bool(tctx, "progress", true)) {
337 torture_comment(tctx, "i=%u \r", i);
341 size = random() % 100;
342 for (c=0;c<size;c++) {
343 if (random() % 100 < 80) {
344 inbuf[c] = random() % 128;
348 if (random() % 10 == 0) {
351 if (random() % 10 == 0) {
355 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
356 printf("i=%d failed UTF-8\n", i);
360 if (!test_buffer(tctx, inbuf, size, "CP850")) {
361 printf("i=%d failed CP850\n", i);
368 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
371 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
373 if (!lp_parm_bool(-1, "iconv", "native", True)) {
374 printf("system iconv disabled - skipping test\n");
378 cd = iconv_open("UTF-16LE", "UCS-4LE");
379 if (cd == (iconv_t)-1) {
380 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
385 cd = iconv_open("UTF-16LE", "CP850");
386 if (cd == (iconv_t)-1) {
387 printf("unable to test - system iconv library does not support UTF-16LE -> CP850\n");
394 torture_suite_add_simple_test(suite, "next_codepoint()",
395 test_next_codepoint);
397 torture_suite_add_simple_test(suite, "first 1M codepoints",
400 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
407 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
409 printf("No native iconv library - can't run iconv test\n");