Use common header file for character set handling in Samba 3 and Samba 4.
[jra/samba/.git] / source3 / lib / iconv.c
1 /* 
2    Unix SMB/CIFS implementation.
3    minimal iconv implementation
4    Copyright (C) Andrew Tridgell 2001
5    Copyright (C) Jelmer Vernooij 2002,2003
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22
23 /*
24  * We have to use strcasecmp here as the character conversions
25  * haven't been initialised yet. JRA.
26  */
27
28 #undef strcasecmp
29
30 /**
31  * @file
32  *
33  * @brief Samba wrapper/stub for iconv character set conversion.
34  *
35  * iconv is the XPG2 interface for converting between character
36  * encodings.  This file provides a Samba wrapper around it, and also
37  * a simple reimplementation that is used if the system does not
38  * implement iconv.
39  *
40  * Samba only works with encodings that are supersets of ASCII: ascii
41  * characters like whitespace can be tested for directly, multibyte
42  * sequences start with a byte with the high bit set, and strings are
43  * terminated by a nul byte.
44  *
45  * Note that the only function provided by iconv is conversion between
46  * characters.  It doesn't directly support operations like
47  * uppercasing or comparison.  We have to convert to UCS-2 and compare
48  * there.
49  *
50  * @sa Samba Developers Guide
51  **/
52
53 static_decl_charset;
54
55 static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
56 static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
57 static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
58 static size_t  utf8_pull(void *,const char **, size_t *, char **, size_t *);
59 static size_t  utf8_push(void *,const char **, size_t *, char **, size_t *);
60 static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
61 static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
62 static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
63 static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
64
65 static struct charset_functions builtin_functions[] = {
66         /* windows is really neither UCS-2 not UTF-16 */
67         {"UCS-2LE",  iconv_copy, iconv_copy},
68         {"UTF-16LE",  iconv_copy, iconv_copy},
69         {"UCS-2BE",  iconv_swab, iconv_swab},
70         {"UTF-16BE",  iconv_swab, iconv_swab},
71
72         /* we include the UTF-8 alias to cope with differing locale settings */
73         {"UTF8",   utf8_pull,  utf8_push},
74         {"UTF-8",   utf8_pull,  utf8_push},
75         {"ASCII", ascii_pull, ascii_push},
76         {"646", ascii_pull, ascii_push},
77         {"ISO-8859-1", ascii_pull, latin1_push},
78         {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
79         {NULL, NULL, NULL}
80 };
81
82 static struct charset_functions *charsets = NULL;
83
84 static struct charset_functions *find_charset_functions(const char *name) 
85 {
86         struct charset_functions *c = charsets;
87
88         while(c) {
89                 if (strcasecmp(name, c->name) == 0) {
90                         return c;
91                 }
92                 c = c->next;
93         }
94
95         return NULL;
96 }
97
98 NTSTATUS smb_register_charset(struct charset_functions *funcs) 
99 {
100         if (!funcs) {
101                 return NT_STATUS_INVALID_PARAMETER;
102         }
103
104         DEBUG(5, ("Attempting to register new charset %s\n", funcs->name));
105         /* Check whether we already have this charset... */
106         if (find_charset_functions(funcs->name)) {
107                 DEBUG(0, ("Duplicate charset %s, not registering\n", funcs->name));
108                 return NT_STATUS_OBJECT_NAME_COLLISION;
109         }
110
111         funcs->next = funcs->prev = NULL;
112         DEBUG(5, ("Registered charset %s\n", funcs->name));
113         DLIST_ADD(charsets, funcs);
114         return NT_STATUS_OK;
115 }
116
117 static void lazy_initialize_iconv(void)
118 {
119         static bool initialized;
120         int i;
121
122         if (!initialized) {
123                 initialized = True;
124                 for(i = 0; builtin_functions[i].name; i++) 
125                         smb_register_charset(&builtin_functions[i]);
126                 static_init_charset;
127         }
128 }
129
130 #ifdef HAVE_NATIVE_ICONV
131 /* if there was an error then reset the internal state,
132    this ensures that we don't have a shift state remaining for
133    character sets like SJIS */
134 static size_t sys_iconv(void *cd, 
135                         const char **inbuf, size_t *inbytesleft,
136                         char **outbuf, size_t *outbytesleft)
137 {
138         size_t ret = iconv((iconv_t)cd, 
139                            (void *)inbuf, inbytesleft,
140                            outbuf, outbytesleft);
141         if (ret == (size_t)-1) {
142                 int saved_errno = errno;
143                 iconv(cd, NULL, NULL, NULL, NULL);
144                 errno = saved_errno;
145         }
146         return ret;
147 }
148 #endif
149
150 /**
151  * This is a simple portable iconv() implementaion.
152  *
153  * It only knows about a very small number of character sets - just
154  * enough that Samba works on systems that don't have iconv.
155  **/
156 size_t smb_iconv(smb_iconv_t cd, 
157                  const char **inbuf, size_t *inbytesleft,
158                  char **outbuf, size_t *outbytesleft)
159 {
160         char cvtbuf[2048];
161         char *bufp = cvtbuf;
162         size_t bufsize;
163
164         /* in many cases we can go direct */
165         if (cd->direct) {
166                 return cd->direct(cd->cd_direct, 
167                                   inbuf, inbytesleft, outbuf, outbytesleft);
168         }
169
170
171         /* otherwise we have to do it chunks at a time */
172         while (*inbytesleft > 0) {
173                 bufp = cvtbuf;
174                 bufsize = sizeof(cvtbuf);
175                 
176                 if (cd->pull(cd->cd_pull, 
177                              inbuf, inbytesleft, &bufp, &bufsize) == -1
178                     && errno != E2BIG) return -1;
179
180                 bufp = cvtbuf;
181                 bufsize = sizeof(cvtbuf) - bufsize;
182
183                 if (cd->push(cd->cd_push, 
184                              (const char **)&bufp, &bufsize, 
185                              outbuf, outbytesleft) == -1) return -1;
186         }
187
188         return 0;
189 }
190
191
192 static bool is_utf16(const char *name)
193 {
194         return strcasecmp(name, "UCS-2LE") == 0 ||
195                 strcasecmp(name, "UTF-16LE") == 0;
196 }
197
198 /*
199   simple iconv_open() wrapper
200  */
201 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
202 {
203         smb_iconv_t ret;
204         struct charset_functions *from, *to;
205         
206         lazy_initialize_iconv();
207         from = charsets;
208         to = charsets;
209
210         ret = SMB_MALLOC_P(smb_iconv_t);
211         if (!ret) {
212                 errno = ENOMEM;
213                 return (smb_iconv_t)-1;
214         }
215         memset(ret, 0, sizeof(smb_iconv_t));
216
217         ret->from_name = SMB_STRDUP(fromcode);
218         ret->to_name = SMB_STRDUP(tocode);
219
220         /* check for the simplest null conversion */
221         if (strcasecmp(fromcode, tocode) == 0) {
222                 ret->direct = iconv_copy;
223                 return ret;
224         }
225
226         /* check if we have a builtin function for this conversion */
227         from = find_charset_functions(fromcode);
228         if(from)ret->pull = from->pull;
229         
230         to = find_charset_functions(tocode);
231         if(to)ret->push = to->push;
232
233         /* check if we can use iconv for this conversion */
234 #ifdef HAVE_NATIVE_ICONV
235         if (!ret->pull) {
236                 ret->cd_pull = iconv_open("UTF-16LE", fromcode);
237                 if (ret->cd_pull == (iconv_t)-1)
238                         ret->cd_pull = iconv_open("UCS-2LE", fromcode);
239                 if (ret->cd_pull != (iconv_t)-1)
240                         ret->pull = sys_iconv;
241         }
242
243         if (!ret->push) {
244                 ret->cd_push = iconv_open(tocode, "UTF-16LE");
245                 if (ret->cd_push == (iconv_t)-1)
246                         ret->cd_push = iconv_open(tocode, "UCS-2LE");
247                 if (ret->cd_push != (iconv_t)-1)
248                         ret->push = sys_iconv;
249         }
250 #endif
251         
252         /* check if there is a module available that can do this conversion */
253         if (!ret->pull && NT_STATUS_IS_OK(smb_probe_module("charset", fromcode))) {
254                 if(!(from = find_charset_functions(fromcode)))
255                         DEBUG(0, ("Module %s doesn't provide charset %s!\n", fromcode, fromcode));
256                 else 
257                         ret->pull = from->pull;
258         }
259
260         if (!ret->push && NT_STATUS_IS_OK(smb_probe_module("charset", tocode))) {
261                 if(!(to = find_charset_functions(tocode)))
262                         DEBUG(0, ("Module %s doesn't provide charset %s!\n", tocode, tocode));
263                 else 
264                         ret->push = to->push;
265         }
266
267         if (!ret->push || !ret->pull) {
268                 SAFE_FREE(ret->from_name);
269                 SAFE_FREE(ret->to_name);
270                 SAFE_FREE(ret);
271                 errno = EINVAL;
272                 return (smb_iconv_t)-1;
273         }
274
275         /* check for conversion to/from ucs2 */
276         if (is_utf16(fromcode) && to) {
277                 ret->direct = to->push;
278                 ret->push = ret->pull = NULL;
279                 return ret;
280         }
281
282         if (is_utf16(tocode) && from) {
283                 ret->direct = from->pull;
284                 ret->push = ret->pull = NULL;
285                 return ret;
286         }
287
288         /* Check if we can do the conversion direct */
289 #ifdef HAVE_NATIVE_ICONV
290         if (is_utf16(fromcode)) {
291                 ret->direct = sys_iconv;
292                 ret->cd_direct = ret->cd_push;
293                 ret->cd_push = NULL;
294                 return ret;
295         }
296         if (is_utf16(tocode)) {
297                 ret->direct = sys_iconv;
298                 ret->cd_direct = ret->cd_pull;
299                 ret->cd_pull = NULL;
300                 return ret;
301         }
302 #endif
303
304         return ret;
305 }
306
307 /*
308   simple iconv_close() wrapper
309 */
310 int smb_iconv_close (smb_iconv_t cd)
311 {
312 #ifdef HAVE_NATIVE_ICONV
313         if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
314         if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
315         if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
316 #endif
317
318         SAFE_FREE(cd->from_name);
319         SAFE_FREE(cd->to_name);
320
321         memset(cd, 0, sizeof(*cd));
322         SAFE_FREE(cd);
323         return 0;
324 }
325
326
327 /**********************************************************************
328  the following functions implement the builtin character sets in Samba
329  and also the "test" character sets that are designed to test
330  multi-byte character set support for english users
331 ***********************************************************************/
332
333 static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
334                          char **outbuf, size_t *outbytesleft)
335 {
336         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
337                 (*outbuf)[0] = (*inbuf)[0];
338                 (*outbuf)[1] = 0;
339                 (*inbytesleft)  -= 1;
340                 (*outbytesleft) -= 2;
341                 (*inbuf)  += 1;
342                 (*outbuf) += 2;
343         }
344
345         if (*inbytesleft > 0) {
346                 errno = E2BIG;
347                 return -1;
348         }
349         
350         return 0;
351 }
352
353 static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
354                          char **outbuf, size_t *outbytesleft)
355 {
356         int ir_count=0;
357
358         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
359                 (*outbuf)[0] = (*inbuf)[0] & 0x7F;
360                 if ((*inbuf)[1]) ir_count++;
361                 (*inbytesleft)  -= 2;
362                 (*outbytesleft) -= 1;
363                 (*inbuf)  += 2;
364                 (*outbuf) += 1;
365         }
366
367         if (*inbytesleft == 1) {
368                 errno = EINVAL;
369                 return -1;
370         }
371
372         if (*inbytesleft > 1) {
373                 errno = E2BIG;
374                 return -1;
375         }
376         
377         return ir_count;
378 }
379
380 static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
381                          char **outbuf, size_t *outbytesleft)
382 {
383         int ir_count=0;
384
385         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
386                 (*outbuf)[0] = (*inbuf)[0];
387                 if ((*inbuf)[1]) ir_count++;
388                 (*inbytesleft)  -= 2;
389                 (*outbytesleft) -= 1;
390                 (*inbuf)  += 2;
391                 (*outbuf) += 1;
392         }
393
394         if (*inbytesleft == 1) {
395                 errno = EINVAL;
396                 return -1;
397         }
398
399         if (*inbytesleft > 1) {
400                 errno = E2BIG;
401                 return -1;
402         }
403         
404         return ir_count;
405 }
406
407 static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
408                          char **outbuf, size_t *outbytesleft)
409 {
410         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
411                 unsigned v;
412
413                 if ((*inbuf)[0] != '@') {
414                         /* seven bit ascii case */
415                         (*outbuf)[0] = (*inbuf)[0];
416                         (*outbuf)[1] = 0;
417                         (*inbytesleft)  -= 1;
418                         (*outbytesleft) -= 2;
419                         (*inbuf)  += 1;
420                         (*outbuf) += 2;
421                         continue;
422                 }
423                 /* it's a hex character */
424                 if (*inbytesleft < 5) {
425                         errno = EINVAL;
426                         return -1;
427                 }
428                 
429                 if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
430                         errno = EILSEQ;
431                         return -1;
432                 }
433
434                 (*outbuf)[0] = v&0xff;
435                 (*outbuf)[1] = v>>8;
436                 (*inbytesleft)  -= 5;
437                 (*outbytesleft) -= 2;
438                 (*inbuf)  += 5;
439                 (*outbuf) += 2;
440         }
441
442         if (*inbytesleft > 0) {
443                 errno = E2BIG;
444                 return -1;
445         }
446         
447         return 0;
448 }
449
450 static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
451                            char **outbuf, size_t *outbytesleft)
452 {
453         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
454                 char buf[6];
455
456                 if ((*inbuf)[1] == 0 && 
457                     ((*inbuf)[0] & 0x80) == 0 &&
458                     (*inbuf)[0] != '@') {
459                         (*outbuf)[0] = (*inbuf)[0];
460                         (*inbytesleft)  -= 2;
461                         (*outbytesleft) -= 1;
462                         (*inbuf)  += 2;
463                         (*outbuf) += 1;
464                         continue;
465                 }
466                 if (*outbytesleft < 5) {
467                         errno = E2BIG;
468                         return -1;
469                 }
470                 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
471                 memcpy(*outbuf, buf, 5);
472                 (*inbytesleft)  -= 2;
473                 (*outbytesleft) -= 5;
474                 (*inbuf)  += 2;
475                 (*outbuf) += 5;
476         }
477
478         if (*inbytesleft == 1) {
479                 errno = EINVAL;
480                 return -1;
481         }
482
483         if (*inbytesleft > 1) {
484                 errno = E2BIG;
485                 return -1;
486         }
487         
488         return 0;
489 }
490
491 static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
492                          char **outbuf, size_t *outbytesleft)
493 {
494         int n;
495
496         n = MIN(*inbytesleft, *outbytesleft);
497
498         swab(*inbuf, *outbuf, (n&~1));
499         if (n&1) {
500                 (*outbuf)[n-1] = 0;
501         }
502
503         (*inbytesleft) -= n;
504         (*outbytesleft) -= n;
505         (*inbuf) += n;
506         (*outbuf) += n;
507
508         if (*inbytesleft > 0) {
509                 errno = E2BIG;
510                 return -1;
511         }
512
513         return 0;
514 }
515
516 static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
517                          char **outbuf, size_t *outbytesleft)
518 {
519         int n;
520
521         n = MIN(*inbytesleft, *outbytesleft);
522
523         memmove(*outbuf, *inbuf, n);
524
525         (*inbytesleft) -= n;
526         (*outbytesleft) -= n;
527         (*inbuf) += n;
528         (*outbuf) += n;
529
530         if (*inbytesleft > 0) {
531                 errno = E2BIG;
532                 return -1;
533         }
534
535         return 0;
536 }
537
538 static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
539                          char **outbuf, size_t *outbytesleft)
540 {
541         size_t in_left=*inbytesleft, out_left=*outbytesleft;
542         const uint8 *c = (const uint8 *)*inbuf;
543         uint8 *uc = (uint8 *)*outbuf;
544
545         while (in_left >= 1 && out_left >= 2) {
546                 unsigned int codepoint;
547
548                 if ((c[0] & 0x80) == 0) {
549                         uc[0] = c[0];
550                         uc[1] = 0;
551                         c  += 1;
552                         in_left  -= 1;
553                         out_left -= 2;
554                         uc += 2;
555                         continue;
556                 }
557
558                 if ((c[0] & 0xe0) == 0xc0) {
559                         if (in_left < 2 ||
560                             (c[1] & 0xc0) != 0x80) {
561                                 errno = EILSEQ;
562                                 goto error;
563                         }
564                         codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
565                         if (codepoint < 0x80) {
566                                 /* don't accept UTF-8 characters that are not minimally packed */
567                                 errno = EILSEQ;
568                                 goto error;
569                         }
570                         uc[1] = codepoint >> 8;
571                         uc[0] = codepoint & 0xff;
572                         c  += 2;
573                         in_left  -= 2;
574                         out_left -= 2;
575                         uc += 2;
576                         continue;
577                 }
578
579                 if ((c[0] & 0xf0) == 0xe0) {
580                         if (in_left < 3 ||
581                             (c[1] & 0xc0) != 0x80 || 
582                             (c[2] & 0xc0) != 0x80) {
583                                 errno = EILSEQ;
584                                 goto error;
585                         }
586                         codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
587                         if (codepoint < 0x800) {
588                                 /* don't accept UTF-8 characters that are not minimally packed */
589                                 errno = EILSEQ;
590                                 goto error;
591                         }
592                         uc[1] = codepoint >> 8;
593                         uc[0] = codepoint & 0xff;
594                         c  += 3;
595                         in_left  -= 3;
596                         out_left -= 2;
597                         uc += 2;
598                         continue;
599                 }
600
601                 if ((c[0] & 0xf8) == 0xf0) {
602                         if (in_left < 4 ||
603                             (c[1] & 0xc0) != 0x80 || 
604                             (c[2] & 0xc0) != 0x80 ||
605                             (c[3] & 0xc0) != 0x80) {
606                                 errno = EILSEQ;
607                                 goto error;
608                         }
609                         codepoint = 
610                                 (c[3]&0x3f) | 
611                                 ((c[2]&0x3f)<<6) | 
612                                 ((c[1]&0x3f)<<12) |
613                                 ((c[0]&0x7)<<18);
614                         if (codepoint < 0x10000 || codepoint > 0x10ffff) {
615                                 /* don't accept UTF-8 characters that are not minimally packed */
616                                 errno = EILSEQ;
617                                 goto error;
618                         }
619
620                         codepoint -= 0x10000;
621
622                         if (out_left < 4) {
623                                 errno = E2BIG;
624                                 goto error;
625                         }
626
627                         uc[0] = (codepoint>>10) & 0xFF;
628                         uc[1] = (codepoint>>18) | 0xd8;
629                         uc[2] = codepoint & 0xFF;
630                         uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
631                         c  += 4;
632                         in_left  -= 4;
633                         out_left -= 4;
634                         uc += 4;
635                         continue;
636                 }
637
638                 /* we don't handle 5 byte sequences */
639                 errno = EINVAL;
640                 goto error;
641         }
642
643         if (in_left > 0) {
644                 errno = E2BIG;
645                 goto error;
646         }
647
648         *inbytesleft = in_left;
649         *outbytesleft = out_left;
650         *inbuf = (char *)c;
651         *outbuf = (char *)uc;   
652         return 0;
653
654 error:
655         *inbytesleft = in_left;
656         *outbytesleft = out_left;
657         *inbuf = (char *)c;
658         *outbuf = (char *)uc;
659         return -1;
660 }
661
662 static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
663                         char **outbuf, size_t *outbytesleft)
664 {
665         size_t in_left=*inbytesleft, out_left=*outbytesleft;
666         uint8 *c = (uint8 *)*outbuf;
667         const uint8 *uc = (const uint8 *)*inbuf;
668
669         while (in_left >= 2 && out_left >= 1) {
670                 unsigned int codepoint;
671
672                 if (uc[1] == 0 && !(uc[0] & 0x80)) {
673                         /* simplest case */
674                         c[0] = uc[0];
675                         in_left  -= 2;
676                         out_left -= 1;
677                         uc += 2;
678                         c  += 1;
679                         continue;
680                 }
681
682                 if ((uc[1]&0xf8) == 0) {
683                         /* next simplest case */
684                         if (out_left < 2) {
685                                 errno = E2BIG;
686                                 goto error;
687                         }
688                         c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
689                         c[1] = 0x80 | (uc[0] & 0x3f);
690                         in_left  -= 2;
691                         out_left -= 2;
692                         uc += 2;
693                         c  += 2;
694                         continue;
695                 }
696
697                 if ((uc[1] & 0xfc) == 0xdc) {
698                         /* its the second part of a 4 byte sequence. Illegal */
699                         if (in_left < 4) {
700                                 errno = EINVAL;
701                         } else {
702                                 errno = EILSEQ;
703                         }
704                         goto error;
705                 }
706
707                 if ((uc[1] & 0xfc) != 0xd8) {
708                         codepoint = uc[0] | (uc[1]<<8);
709                         if (out_left < 3) {
710                                 errno = E2BIG;
711                                 goto error;
712                         }
713                         c[0] = 0xe0 | (codepoint >> 12);
714                         c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
715                         c[2] = 0x80 | (codepoint & 0x3f);
716                         
717                         in_left  -= 2;
718                         out_left -= 3;
719                         uc  += 2;
720                         c   += 3;
721                         continue;
722                 }
723
724                 /* its the first part of a 4 byte sequence */
725                 if (in_left < 4) {
726                         errno = EINVAL;
727                         goto error;
728                 }
729                 if ((uc[3] & 0xfc) != 0xdc) {
730                         errno = EILSEQ;
731                         goto error;
732                 }
733                 codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) | 
734                                        (uc[0]<<10) | ((uc[1] & 0x3)<<18));
735                 
736                 if (out_left < 4) {
737                         errno = E2BIG;
738                         goto error;
739                 }
740                 c[0] = 0xf0 | (codepoint >> 18);
741                 c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
742                 c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
743                 c[3] = 0x80 | (codepoint & 0x3f);
744                 
745                 in_left  -= 4;
746                 out_left -= 4;
747                 uc       += 4;
748                 c        += 4;
749         }
750
751         if (in_left == 1) {
752                 errno = EINVAL;
753                 goto error;
754         }
755
756         if (in_left > 1) {
757                 errno = E2BIG;
758                 goto error;
759         }
760
761         *inbytesleft = in_left;
762         *outbytesleft = out_left;
763         *inbuf  = (char *)uc;
764         *outbuf = (char *)c;
765         
766         return 0;
767
768 error:
769         *inbytesleft = in_left;
770         *outbytesleft = out_left;
771         *inbuf  = (char *)uc;
772         *outbuf = (char *)c;
773         return -1;
774 }
775