c08524eaa03aae5c8703313d876b3ae9f8b14e3e
[kai/samba.git] / source3 / lib / iconv.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    minimal iconv implementation
5    Copyright (C) Andrew Tridgell 2001
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #include "includes.h"
23
24 static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
25 static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
26 static size_t  utf8_pull(void *,char **, size_t *, char **, size_t *);
27 static size_t  utf8_push(void *,char **, size_t *, char **, size_t *);
28 static size_t weird_pull(void *,char **, size_t *, char **, size_t *);
29 static size_t weird_push(void *,char **, size_t *, char **, size_t *);
30 static size_t ucs2hex_pull(void *,char **, size_t *, char **, size_t *);
31 static size_t ucs2hex_push(void *,char **, size_t *, char **, size_t *);
32 static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
33
34 /*
35   for each charset we have a function that pulls from that charset to 
36   a ucs2 buffer, and a function that pushes to a ucs2 buffer 
37 */
38 static struct {
39         char *name;
40         size_t (*pull)(void *, char **inbuf, size_t *inbytesleft,
41                        char **outbuf, size_t *outbytesleft);
42         size_t (*push)(void *, char **inbuf, size_t *inbytesleft,
43                        char **outbuf, size_t *outbytesleft);
44 } charsets[] = {
45         {"UCS-2LE",  iconv_copy, iconv_copy},
46         {"UTF8",   utf8_pull,  utf8_push},
47         {"ASCII", ascii_pull, ascii_push},
48         {"WEIRD", weird_pull, weird_push},
49         {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
50         {NULL, NULL, NULL}
51 };
52
53
54 /* if there was an error then reset the internal state,
55    this ensures that we don't have a shift state remaining for
56    character sets like SJIS */
57 static size_t sys_iconv(void *cd, 
58                         char **inbuf, size_t *inbytesleft,
59                         char **outbuf, size_t *outbytesleft)
60 {
61 #ifdef HAVE_NATIVE_ICONV
62         size_t ret = iconv((iconv_t)cd, 
63                            inbuf, inbytesleft, 
64                            outbuf, outbytesleft);
65         if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
66         return ret;
67 #else
68         errno = EINVAL;
69         return -1;
70 #endif
71 }
72
73 /*
74   this is a simple portable iconv() implementaion. It only knows about
75   a very small number of character sets - just enough that Samba works
76   on systems that don't have iconv
77  */
78 size_t smb_iconv(smb_iconv_t cd, 
79                  const char **inbuf, size_t *inbytesleft,
80                  char **outbuf, size_t *outbytesleft)
81 {
82         char cvtbuf[2048];
83         char *bufp = cvtbuf;
84         size_t bufsize;
85
86         /* in many cases we can go direct */
87         if (cd->direct) {
88                 return cd->direct(cd->cd_direct, 
89                                   (char **)inbuf, inbytesleft, outbuf, outbytesleft);
90         }
91
92
93         /* otherwise we have to do it chunks at a time */
94         while (*inbytesleft > 0) {
95                 bufp = cvtbuf;
96                 bufsize = sizeof(cvtbuf);
97                 
98                 if (cd->pull(cd->cd_pull, 
99                              (char **)inbuf, inbytesleft, &bufp, &bufsize) == -1
100                     && errno != E2BIG) return -1;
101
102                 bufp = cvtbuf;
103                 bufsize = sizeof(cvtbuf) - bufsize;
104
105                 if (cd->push(cd->cd_push, 
106                              &bufp, &bufsize, 
107                              outbuf, outbytesleft) == -1) return -1;
108         }
109
110         return 0;
111 }
112
113 /*
114   simple iconv_open() wrapper
115  */
116 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
117 {
118         smb_iconv_t ret;
119         int from, to;
120
121         ret = (smb_iconv_t)malloc(sizeof(*ret));
122         if (!ret) {
123                 errno = ENOMEM;
124                 return (smb_iconv_t)-1;
125         }
126         memset(ret, 0, sizeof(*ret));
127
128         ret->from_name = strdup(fromcode);
129         ret->to_name = strdup(tocode);
130
131         /* check for the simplest null conversion */
132         if (strcmp(fromcode, tocode) == 0) {
133                 ret->direct = iconv_copy;
134                 return ret;
135         }
136
137         for (from=0; charsets[from].name; from++) {
138                 if (strcasecmp(charsets[from].name, fromcode) == 0) break;
139         }
140         for (to=0; charsets[to].name; to++) {
141                 if (strcasecmp(charsets[to].name, tocode) == 0) break;
142         }
143
144 #ifdef HAVE_NATIVE_ICONV
145         if (!charsets[from].name) {
146                 ret->pull = sys_iconv;
147                 ret->cd_pull = iconv_open("UCS-2LE", fromcode);
148                 if (ret->cd_pull == (iconv_t)-1) goto failed;
149         }
150         if (!charsets[to].name) {
151                 ret->push = sys_iconv;
152                 ret->cd_push = iconv_open(tocode, "UCS-2LE");
153                 if (ret->cd_push == (iconv_t)-1) goto failed;
154         }
155 #else
156         if (!charsets[from].name || !charsets[to].name) {
157                 goto failed;
158         }
159 #endif
160
161         /* check for conversion to/from ucs2 */
162         if (from == 0 && charsets[to].name) {
163                 ret->direct = charsets[to].push;
164                 return ret;
165         }
166         if (to == 0 && charsets[from].name) {
167                 ret->direct = charsets[from].pull;
168                 return ret;
169         }
170
171 #ifdef HAVE_NATIVE_ICONV
172         if (from == 0) {
173                 ret->direct = sys_iconv;
174                 ret->cd_direct = ret->cd_push;
175                 ret->cd_push = NULL;
176                 return ret;
177         }
178         if (to == 0) {
179                 ret->direct = sys_iconv;
180                 ret->cd_direct = ret->cd_pull;
181                 ret->cd_pull = NULL;
182                 return ret;
183         }
184 #endif
185
186         /* the general case has to go via a buffer */
187         if (!ret->pull) ret->pull = charsets[from].pull;
188         if (!ret->push) ret->push = charsets[to].push;
189         return ret;
190
191 failed:
192         SAFE_FREE(ret);
193         errno = EINVAL;
194         return (smb_iconv_t)-1;
195 }
196
197 /*
198   simple iconv_close() wrapper
199 */
200 int smb_iconv_close (smb_iconv_t cd)
201 {
202 #ifdef HAVE_NATIVE_ICONV
203         if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
204         if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
205         if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
206 #endif
207
208         SAFE_FREE(cd->from_name);
209         SAFE_FREE(cd->to_name);
210
211         memset(cd, 0, sizeof(*cd));
212         SAFE_FREE(cd);
213         return 0;
214 }
215
216
217 /**********************************************************************
218  the following functions implement the builtin character sets in Samba
219  and also the "test" character sets that are designed to test
220  multi-byte character set support for english users
221 ***********************************************************************/
222
223 static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft,
224                          char **outbuf, size_t *outbytesleft)
225 {
226         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
227                 (*outbuf)[0] = (*inbuf)[0];
228                 (*outbuf)[1] = 0;
229                 (*inbytesleft)  -= 1;
230                 (*outbytesleft) -= 2;
231                 (*inbuf)  += 1;
232                 (*outbuf) += 2;
233         }
234
235         if (*inbytesleft > 0) {
236                 errno = E2BIG;
237                 return -1;
238         }
239         
240         return 0;
241 }
242
243 static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft,
244                          char **outbuf, size_t *outbytesleft)
245 {
246         int ir_count=0;
247
248         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
249                 (*outbuf)[0] = (*inbuf)[0] & 0x7F;
250                 if ((*inbuf)[1]) ir_count++;
251                 (*inbytesleft)  -= 2;
252                 (*outbytesleft) -= 1;
253                 (*inbuf)  += 2;
254                 (*outbuf) += 1;
255         }
256
257         if (*inbytesleft == 1) {
258                 errno = EINVAL;
259                 return -1;
260         }
261
262         if (*inbytesleft > 1) {
263                 errno = E2BIG;
264                 return -1;
265         }
266         
267         return ir_count;
268 }
269
270
271 static size_t ucs2hex_pull(void *cd, char **inbuf, size_t *inbytesleft,
272                          char **outbuf, size_t *outbytesleft)
273 {
274         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
275                 unsigned v;
276
277                 if ((*inbuf)[0] != '@') {
278                         /* seven bit ascii case */
279                         (*outbuf)[0] = (*inbuf)[0];
280                         (*outbuf)[1] = 0;
281                         (*inbytesleft)  -= 1;
282                         (*outbytesleft) -= 2;
283                         (*inbuf)  += 1;
284                         (*outbuf) += 2;
285                         continue;
286                 }
287                 /* it's a hex character */
288                 if (*inbytesleft < 5) {
289                         errno = EINVAL;
290                         return -1;
291                 }
292                 
293                 if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
294                         errno = EILSEQ;
295                         return -1;
296                 }
297
298                 (*outbuf)[0] = v&0xff;
299                 (*outbuf)[1] = v>>8;
300                 (*inbytesleft)  -= 5;
301                 (*outbytesleft) -= 2;
302                 (*inbuf)  += 5;
303                 (*outbuf) += 2;
304         }
305
306         if (*inbytesleft > 0) {
307                 errno = E2BIG;
308                 return -1;
309         }
310         
311         return 0;
312 }
313
314 static size_t ucs2hex_push(void *cd, char **inbuf, size_t *inbytesleft,
315                            char **outbuf, size_t *outbytesleft)
316 {
317         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
318                 char buf[6];
319
320                 if ((*inbuf)[1] == 0 && 
321                     ((*inbuf)[0] & 0x80) == 0 &&
322                     (*inbuf)[0] != '@') {
323                         (*outbuf)[0] = (*inbuf)[0];
324                         (*inbytesleft)  -= 2;
325                         (*outbytesleft) -= 1;
326                         (*inbuf)  += 2;
327                         (*outbuf) += 1;
328                         continue;
329                 }
330                 if (*outbytesleft < 5) {
331                         errno = E2BIG;
332                         return -1;
333                 }
334                 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
335                 memcpy(*outbuf, buf, 5);
336                 (*inbytesleft)  -= 2;
337                 (*outbytesleft) -= 5;
338                 (*inbuf)  += 2;
339                 (*outbuf) += 5;
340         }
341
342         if (*inbytesleft == 1) {
343                 errno = EINVAL;
344                 return -1;
345         }
346
347         if (*inbytesleft > 1) {
348                 errno = E2BIG;
349                 return -1;
350         }
351         
352         return 0;
353 }
354
355
356 /* the "weird" character set is very useful for testing multi-byte
357    support and finding bugs. Don't use on a production system! 
358 */
359 static struct {
360         char from;
361         char *to;
362         int len;
363 } weird_table[] = {
364         {'q', "^q^", 3},
365         {'Q', "^Q^", 3},
366         {0, NULL}
367 };
368
369 static size_t weird_pull(void *cd, char **inbuf, size_t *inbytesleft,
370                          char **outbuf, size_t *outbytesleft)
371 {
372         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
373                 int i;
374                 int done = 0;
375                 for (i=0;weird_table[i].from;i++) {
376                         if (strncmp((*inbuf), 
377                                     weird_table[i].to, 
378                                     weird_table[i].len) == 0) {
379                                 if (*inbytesleft < weird_table[i].len) {
380                                         DEBUG(0,("ERROR: truncated weird string\n"));
381                                         /* smb_panic("weird_pull"); */
382
383                                 } else {
384                                         (*outbuf)[0] = weird_table[i].from;
385                                         (*outbuf)[1] = 0;
386                                         (*inbytesleft)  -= weird_table[i].len;
387                                         (*outbytesleft) -= 2;
388                                         (*inbuf)  += weird_table[i].len;
389                                         (*outbuf) += 2;
390                                         done = 1;
391                                         break;
392                                 }
393                         }
394                 }
395                 if (done) continue;
396                 (*outbuf)[0] = (*inbuf)[0];
397                 (*outbuf)[1] = 0;
398                 (*inbytesleft)  -= 1;
399                 (*outbytesleft) -= 2;
400                 (*inbuf)  += 1;
401                 (*outbuf) += 2;
402         }
403
404         if (*inbytesleft > 0) {
405                 errno = E2BIG;
406                 return -1;
407         }
408         
409         return 0;
410 }
411
412 static size_t weird_push(void *cd, char **inbuf, size_t *inbytesleft,
413                          char **outbuf, size_t *outbytesleft)
414 {
415         int ir_count=0;
416
417         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
418                 int i;
419                 int done=0;
420                 for (i=0;weird_table[i].from;i++) {
421                         if ((*inbuf)[0] == weird_table[i].from &&
422                             (*inbuf)[1] == 0) {
423                                 if (*outbytesleft < weird_table[i].len) {
424                                         DEBUG(0,("No room for weird character\n"));
425                                         /* smb_panic("weird_push"); */
426                                 } else {
427                                         memcpy(*outbuf, weird_table[i].to, 
428                                                weird_table[i].len);
429                                         (*inbytesleft)  -= 2;
430                                         (*outbytesleft) -= weird_table[i].len;
431                                         (*inbuf)  += 2;
432                                         (*outbuf) += weird_table[i].len;
433                                         done = 1;
434                                         break;
435                                 }
436                         }
437                 }
438                 if (done) continue;
439
440                 (*outbuf)[0] = (*inbuf)[0];
441                 if ((*inbuf)[1]) ir_count++;
442                 (*inbytesleft)  -= 2;
443                 (*outbytesleft) -= 1;
444                 (*inbuf)  += 2;
445                 (*outbuf) += 1;
446         }
447
448         if (*inbytesleft == 1) {
449                 errno = EINVAL;
450                 return -1;
451         }
452
453         if (*inbytesleft > 1) {
454                 errno = E2BIG;
455                 return -1;
456         }
457         
458         return ir_count;
459 }
460
461 static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft,
462                          char **outbuf, size_t *outbytesleft)
463 {
464         int n;
465
466         n = MIN(*inbytesleft, *outbytesleft);
467
468         memmove(*outbuf, *inbuf, n);
469
470         (*inbytesleft) -= n;
471         (*outbytesleft) -= n;
472         (*inbuf) += n;
473         (*outbuf) += n;
474
475         if (*inbytesleft > 0) {
476                 errno = E2BIG;
477                 return -1;
478         }
479
480         return 0;
481 }
482
483 static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
484                          char **outbuf, size_t *outbytesleft)
485 {
486         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
487                 unsigned char *c = (unsigned char *)*inbuf;
488                 unsigned char *uc = (unsigned char *)*outbuf;
489                 int len = 1;
490
491                 if ((c[0] & 0x80) == 0) {
492                         uc[0] = c[0];
493                         uc[1] = 0;
494                 } else if ((c[0] & 0xf0) == 0xe0) {
495                         if (*inbytesleft < 3) {
496                                 DEBUG(0,("short utf8 char\n"));
497                                 goto badseq;
498                         }
499                         uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
500                         uc[0] = (c[1]<<6) | (c[2]&0x3f);
501                         len = 3;
502                 } else if ((c[0] & 0xe0) == 0xc0) {
503                         if (*inbytesleft < 2) {
504                                 DEBUG(0,("short utf8 char\n"));
505                                 goto badseq;
506                         }
507                         uc[1] = (c[0]>>2) & 0x7;
508                         uc[0] = (c[0]<<6) | (c[1]&0x3f);
509                         len = 2;
510                 }
511
512                 (*inbuf)  += len;
513                 (*inbytesleft)  -= len;
514                 (*outbytesleft) -= 2;
515                 (*outbuf) += 2;
516         }
517
518         if (*inbytesleft > 0) {
519                 errno = E2BIG;
520                 return -1;
521         }
522         
523         return 0;
524
525 badseq:
526         errno = EINVAL;
527         return -1;
528 }
529
530 static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
531                          char **outbuf, size_t *outbytesleft)
532 {
533         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
534                 unsigned char *c = (unsigned char *)*outbuf;
535                 unsigned char *uc = (unsigned char *)*inbuf;
536                 int len=1;
537
538                 if (uc[1] & 0xf8) {
539                         if (*outbytesleft < 3) {
540                                 DEBUG(0,("short utf8 write\n"));
541                                 goto toobig;
542                         }
543                         c[0] = 0xe0 | (uc[1]>>4);
544                         c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
545                         c[2] = 0x80 | (uc[0]&0x3f);
546                         len = 3;
547                 } else if (uc[1] | (uc[0] & 0x80)) {
548                         if (*outbytesleft < 2) {
549                                 DEBUG(0,("short utf8 write\n"));
550                                 goto toobig;
551                         }
552                         c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
553                         c[1] = 0x80 | (uc[0]&0x3f);
554                         len = 2;
555                 } else {
556                         c[0] = uc[0];
557                 }
558
559
560                 (*inbytesleft)  -= 2;
561                 (*outbytesleft) -= len;
562                 (*inbuf)  += 2;
563                 (*outbuf) += len;
564         }
565
566         if (*inbytesleft == 1) {
567                 errno = EINVAL;
568                 return -1;
569         }
570
571         if (*inbytesleft > 1) {
572                 errno = E2BIG;
573                 return -1;
574         }
575         
576         return 0;
577
578 toobig:
579         errno = E2BIG;
580         return -1;
581 }
582