f23e4351c02ee97940c7e7fe64d7dae0efb07f6c
[samba.git] / source / lib / iconv.c
1 /* 
2    Unix SMB/CIFS implementation.
3    minimal iconv implementation
4    Copyright (C) Andrew Tridgell 2001
5    Copyright (C) Jelmer Vernooij 2002,2003
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #include "includes.h"
23
24 /*
25  * We have to use strcasecmp here as the character conversions
26  * haven't been initialised yet. JRA.
27  */
28
29 #undef strcasecmp
30
31 /**
32  * @file
33  *
34  * @brief Samba wrapper/stub for iconv character set conversion.
35  *
36  * iconv is the XPG2 interface for converting between character
37  * encodings.  This file provides a Samba wrapper around it, and also
38  * a simple reimplementation that is used if the system does not
39  * implement iconv.
40  *
41  * Samba only works with encodings that are supersets of ASCII: ascii
42  * characters like whitespace can be tested for directly, multibyte
43  * sequences start with a byte with the high bit set, and strings are
44  * terminated by a nul byte.
45  *
46  * Note that the only function provided by iconv is conversion between
47  * characters.  It doesn't directly support operations like
48  * uppercasing or comparison.  We have to convert to UCS-2 and compare
49  * there.
50  *
51  * @sa Samba Developers Guide
52  **/
53
54 static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
55 static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
56 static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
57 static size_t  utf8_pull(void *,const char **, size_t *, char **, size_t *);
58 static size_t  utf8_push(void *,const char **, size_t *, char **, size_t *);
59 static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
60 static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
61 static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
62 static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
63
64 static struct charset_functions builtin_functions[] = {
65         /* windows is really neither UCS-2 not UTF-16 */
66         {"UCS-2LE",  iconv_copy, iconv_copy},
67         {"UTF-16LE",  iconv_copy, iconv_copy},
68         {"UCS-2BE",  iconv_swab, iconv_swab},
69         {"UTF-16BE",  iconv_swab, iconv_swab},
70
71         /* we include the UTF-8 alias to cope with differing locale settings */
72         {"UTF8",   utf8_pull,  utf8_push},
73         {"UTF-8",   utf8_pull,  utf8_push},
74         {"ASCII", ascii_pull, ascii_push},
75         {"646", ascii_pull, ascii_push},
76         {"ISO-8859-1", ascii_pull, latin1_push},
77         {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
78         {NULL, NULL, NULL}
79 };
80
81 static struct charset_functions *charsets = NULL;
82
83 static struct charset_functions *find_charset_functions(const char *name) 
84 {
85         struct charset_functions *c = charsets;
86
87         while(c) {
88                 if (strcasecmp(name, c->name) == 0) {
89                         return c;
90                 }
91                 c = c->next;
92         }
93
94         return NULL;
95 }
96
97 NTSTATUS smb_register_charset(struct charset_functions *funcs) 
98 {
99         if (!funcs) {
100                 return NT_STATUS_INVALID_PARAMETER;
101         }
102
103         DEBUG(5, ("Attempting to register new charset %s\n", funcs->name));
104         /* Check whether we already have this charset... */
105         if (find_charset_functions(funcs->name)) {
106                 DEBUG(0, ("Duplicate charset %s, not registering\n", funcs->name));
107                 return NT_STATUS_OBJECT_NAME_COLLISION;
108         }
109
110         funcs->next = funcs->prev = NULL;
111         DEBUG(5, ("Registered charset %s\n", funcs->name));
112         DLIST_ADD(charsets, funcs);
113         return NT_STATUS_OK;
114 }
115
116 static void lazy_initialize_iconv(void)
117 {
118         static BOOL initialized;
119         int i;
120
121         if (!initialized) {
122                 initialized = True;
123                 for(i = 0; builtin_functions[i].name; i++) 
124                         smb_register_charset(&builtin_functions[i]);
125                 static_init_charset;
126         }
127 }
128
129 /* if there was an error then reset the internal state,
130    this ensures that we don't have a shift state remaining for
131    character sets like SJIS */
132 static size_t sys_iconv(void *cd, 
133                         const char **inbuf, size_t *inbytesleft,
134                         char **outbuf, size_t *outbytesleft)
135 {
136 #ifdef HAVE_NATIVE_ICONV
137         size_t ret = iconv((iconv_t)cd, 
138                            CONST_DISCARD(char **, inbuf), inbytesleft, 
139                            outbuf, outbytesleft);
140         if (ret == (size_t)-1) {
141                 int saved_errno = errno;
142                 iconv(cd, NULL, NULL, NULL, NULL);
143                 errno = saved_errno;
144         }
145         return ret;
146 #else
147         errno = EINVAL;
148         return -1;
149 #endif
150 }
151
152 /**
153  * This is a simple portable iconv() implementaion.
154  *
155  * It only knows about a very small number of character sets - just
156  * enough that Samba works on systems that don't have iconv.
157  **/
158 size_t smb_iconv(smb_iconv_t cd, 
159                  const char **inbuf, size_t *inbytesleft,
160                  char **outbuf, size_t *outbytesleft)
161 {
162         char cvtbuf[2048];
163         char *bufp = cvtbuf;
164         size_t bufsize;
165
166         /* in many cases we can go direct */
167         if (cd->direct) {
168                 return cd->direct(cd->cd_direct, 
169                                   inbuf, inbytesleft, outbuf, outbytesleft);
170         }
171
172
173         /* otherwise we have to do it chunks at a time */
174         while (*inbytesleft > 0) {
175                 bufp = cvtbuf;
176                 bufsize = sizeof(cvtbuf);
177                 
178                 if (cd->pull(cd->cd_pull, 
179                              inbuf, inbytesleft, &bufp, &bufsize) == -1
180                     && errno != E2BIG) return -1;
181
182                 bufp = cvtbuf;
183                 bufsize = sizeof(cvtbuf) - bufsize;
184
185                 if (cd->push(cd->cd_push, 
186                              (const char **)&bufp, &bufsize, 
187                              outbuf, outbytesleft) == -1) return -1;
188         }
189
190         return 0;
191 }
192
193
194 static BOOL is_utf16(const char *name)
195 {
196         return strcasecmp(name, "UCS-2LE") == 0 ||
197                 strcasecmp(name, "UTF-16LE") == 0;
198 }
199
200 /*
201   simple iconv_open() wrapper
202  */
203 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
204 {
205         smb_iconv_t ret;
206         struct charset_functions *from, *to;
207         
208         lazy_initialize_iconv();
209         from = charsets;
210         to = charsets;
211
212         ret = SMB_MALLOC_P(struct _smb_iconv_t);
213         if (!ret) {
214                 errno = ENOMEM;
215                 return (smb_iconv_t)-1;
216         }
217         memset(ret, 0, sizeof(struct _smb_iconv_t));
218
219         ret->from_name = SMB_STRDUP(fromcode);
220         ret->to_name = SMB_STRDUP(tocode);
221
222         /* check for the simplest null conversion */
223         if (strcasecmp(fromcode, tocode) == 0) {
224                 ret->direct = iconv_copy;
225                 return ret;
226         }
227
228         /* check if we have a builtin function for this conversion */
229         from = find_charset_functions(fromcode);
230         if(from)ret->pull = from->pull;
231         
232         to = find_charset_functions(tocode);
233         if(to)ret->push = to->push;
234
235         /* check if we can use iconv for this conversion */
236 #ifdef HAVE_NATIVE_ICONV
237         if (!ret->pull) {
238                 ret->cd_pull = iconv_open("UTF-16LE", fromcode);
239                 if (ret->cd_pull == (iconv_t)-1)
240                         ret->cd_pull = iconv_open("UCS-2LE", fromcode);
241                 if (ret->cd_pull != (iconv_t)-1)
242                         ret->pull = sys_iconv;
243         }
244
245         if (!ret->push) {
246                 ret->cd_push = iconv_open(tocode, "UTF-16LE");
247                 if (ret->cd_push == (iconv_t)-1)
248                         ret->cd_push = iconv_open(tocode, "UCS-2LE");
249                 if (ret->cd_push != (iconv_t)-1)
250                         ret->push = sys_iconv;
251         }
252 #endif
253         
254         /* check if there is a module available that can do this conversion */
255         if (!ret->pull && NT_STATUS_IS_OK(smb_probe_module("charset", fromcode))) {
256                 if(!(from = find_charset_functions(fromcode)))
257                         DEBUG(0, ("Module %s doesn't provide charset %s!\n", fromcode, fromcode));
258                 else 
259                         ret->pull = from->pull;
260         }
261
262         if (!ret->push && NT_STATUS_IS_OK(smb_probe_module("charset", tocode))) {
263                 if(!(to = find_charset_functions(tocode)))
264                         DEBUG(0, ("Module %s doesn't provide charset %s!\n", tocode, tocode));
265                 else 
266                         ret->push = to->push;
267         }
268
269         if (!ret->push || !ret->pull) {
270                 SAFE_FREE(ret->from_name);
271                 SAFE_FREE(ret->to_name);
272                 SAFE_FREE(ret);
273                 errno = EINVAL;
274                 return (smb_iconv_t)-1;
275         }
276
277         /* check for conversion to/from ucs2 */
278         if (is_utf16(fromcode) && to) {
279                 ret->direct = to->push;
280                 ret->push = ret->pull = NULL;
281                 return ret;
282         }
283
284         if (is_utf16(tocode) && from) {
285                 ret->direct = from->pull;
286                 ret->push = ret->pull = NULL;
287                 return ret;
288         }
289
290         /* Check if we can do the conversion direct */
291 #ifdef HAVE_NATIVE_ICONV
292         if (is_utf16(fromcode)) {
293                 ret->direct = sys_iconv;
294                 ret->cd_direct = ret->cd_push;
295                 ret->cd_push = NULL;
296                 return ret;
297         }
298         if (is_utf16(tocode)) {
299                 ret->direct = sys_iconv;
300                 ret->cd_direct = ret->cd_pull;
301                 ret->cd_pull = NULL;
302                 return ret;
303         }
304 #endif
305
306         return ret;
307 }
308
309 /*
310   simple iconv_close() wrapper
311 */
312 int smb_iconv_close (smb_iconv_t cd)
313 {
314 #ifdef HAVE_NATIVE_ICONV
315         if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
316         if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
317         if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
318 #endif
319
320         SAFE_FREE(cd->from_name);
321         SAFE_FREE(cd->to_name);
322
323         memset(cd, 0, sizeof(*cd));
324         SAFE_FREE(cd);
325         return 0;
326 }
327
328
329 /**********************************************************************
330  the following functions implement the builtin character sets in Samba
331  and also the "test" character sets that are designed to test
332  multi-byte character set support for english users
333 ***********************************************************************/
334
335 static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
336                          char **outbuf, size_t *outbytesleft)
337 {
338         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
339                 (*outbuf)[0] = (*inbuf)[0];
340                 (*outbuf)[1] = 0;
341                 (*inbytesleft)  -= 1;
342                 (*outbytesleft) -= 2;
343                 (*inbuf)  += 1;
344                 (*outbuf) += 2;
345         }
346
347         if (*inbytesleft > 0) {
348                 errno = E2BIG;
349                 return -1;
350         }
351         
352         return 0;
353 }
354
355 static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
356                          char **outbuf, size_t *outbytesleft)
357 {
358         int ir_count=0;
359
360         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
361                 (*outbuf)[0] = (*inbuf)[0] & 0x7F;
362                 if ((*inbuf)[1]) ir_count++;
363                 (*inbytesleft)  -= 2;
364                 (*outbytesleft) -= 1;
365                 (*inbuf)  += 2;
366                 (*outbuf) += 1;
367         }
368
369         if (*inbytesleft == 1) {
370                 errno = EINVAL;
371                 return -1;
372         }
373
374         if (*inbytesleft > 1) {
375                 errno = E2BIG;
376                 return -1;
377         }
378         
379         return ir_count;
380 }
381
382 static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
383                          char **outbuf, size_t *outbytesleft)
384 {
385         int ir_count=0;
386
387         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
388                 (*outbuf)[0] = (*inbuf)[0];
389                 if ((*inbuf)[1]) ir_count++;
390                 (*inbytesleft)  -= 2;
391                 (*outbytesleft) -= 1;
392                 (*inbuf)  += 2;
393                 (*outbuf) += 1;
394         }
395
396         if (*inbytesleft == 1) {
397                 errno = EINVAL;
398                 return -1;
399         }
400
401         if (*inbytesleft > 1) {
402                 errno = E2BIG;
403                 return -1;
404         }
405         
406         return ir_count;
407 }
408
409 static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
410                          char **outbuf, size_t *outbytesleft)
411 {
412         while (*inbytesleft >= 1 && *outbytesleft >= 2) {
413                 unsigned v;
414
415                 if ((*inbuf)[0] != '@') {
416                         /* seven bit ascii case */
417                         (*outbuf)[0] = (*inbuf)[0];
418                         (*outbuf)[1] = 0;
419                         (*inbytesleft)  -= 1;
420                         (*outbytesleft) -= 2;
421                         (*inbuf)  += 1;
422                         (*outbuf) += 2;
423                         continue;
424                 }
425                 /* it's a hex character */
426                 if (*inbytesleft < 5) {
427                         errno = EINVAL;
428                         return -1;
429                 }
430                 
431                 if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
432                         errno = EILSEQ;
433                         return -1;
434                 }
435
436                 (*outbuf)[0] = v&0xff;
437                 (*outbuf)[1] = v>>8;
438                 (*inbytesleft)  -= 5;
439                 (*outbytesleft) -= 2;
440                 (*inbuf)  += 5;
441                 (*outbuf) += 2;
442         }
443
444         if (*inbytesleft > 0) {
445                 errno = E2BIG;
446                 return -1;
447         }
448         
449         return 0;
450 }
451
452 static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
453                            char **outbuf, size_t *outbytesleft)
454 {
455         while (*inbytesleft >= 2 && *outbytesleft >= 1) {
456                 char buf[6];
457
458                 if ((*inbuf)[1] == 0 && 
459                     ((*inbuf)[0] & 0x80) == 0 &&
460                     (*inbuf)[0] != '@') {
461                         (*outbuf)[0] = (*inbuf)[0];
462                         (*inbytesleft)  -= 2;
463                         (*outbytesleft) -= 1;
464                         (*inbuf)  += 2;
465                         (*outbuf) += 1;
466                         continue;
467                 }
468                 if (*outbytesleft < 5) {
469                         errno = E2BIG;
470                         return -1;
471                 }
472                 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
473                 memcpy(*outbuf, buf, 5);
474                 (*inbytesleft)  -= 2;
475                 (*outbytesleft) -= 5;
476                 (*inbuf)  += 2;
477                 (*outbuf) += 5;
478         }
479
480         if (*inbytesleft == 1) {
481                 errno = EINVAL;
482                 return -1;
483         }
484
485         if (*inbytesleft > 1) {
486                 errno = E2BIG;
487                 return -1;
488         }
489         
490         return 0;
491 }
492
493 static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
494                          char **outbuf, size_t *outbytesleft)
495 {
496         int n;
497
498         n = MIN(*inbytesleft, *outbytesleft);
499
500         swab(*inbuf, *outbuf, (n&~1));
501         if (n&1) {
502                 (*outbuf)[n-1] = 0;
503         }
504
505         (*inbytesleft) -= n;
506         (*outbytesleft) -= n;
507         (*inbuf) += n;
508         (*outbuf) += n;
509
510         if (*inbytesleft > 0) {
511                 errno = E2BIG;
512                 return -1;
513         }
514
515         return 0;
516 }
517
518 static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
519                          char **outbuf, size_t *outbytesleft)
520 {
521         int n;
522
523         n = MIN(*inbytesleft, *outbytesleft);
524
525         memmove(*outbuf, *inbuf, n);
526
527         (*inbytesleft) -= n;
528         (*outbytesleft) -= n;
529         (*inbuf) += n;
530         (*outbuf) += n;
531
532         if (*inbytesleft > 0) {
533                 errno = E2BIG;
534                 return -1;
535         }
536
537         return 0;
538 }
539
540 static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
541                          char **outbuf, size_t *outbytesleft)
542 {
543         size_t in_left=*inbytesleft, out_left=*outbytesleft;
544         const uint8 *c = (const uint8 *)*inbuf;
545         uint8 *uc = (uint8 *)*outbuf;
546
547         while (in_left >= 1 && out_left >= 2) {
548                 if ((c[0] & 0x80) == 0) {
549                         uc[0] = c[0];
550                         uc[1] = 0;
551                         c  += 1;
552                         in_left  -= 1;
553                         out_left -= 2;
554                         uc += 2;
555                         continue;
556                 }
557
558                 if ((c[0] & 0xe0) == 0xc0) {
559                         if (in_left < 2 ||
560                             (c[1] & 0xc0) != 0x80) {
561                                 errno = EILSEQ;
562                                 goto error;
563                         }
564                         uc[1] = (c[0]>>2) & 0x7;
565                         uc[0] = (c[0]<<6) | (c[1]&0x3f);
566                         c  += 2;
567                         in_left  -= 2;
568                         out_left -= 2;
569                         uc += 2;
570                         continue;
571                 }
572
573                 if ((c[0] & 0xf0) == 0xe0) {
574                         if (in_left < 3 ||
575                             (c[1] & 0xc0) != 0x80 || 
576                             (c[2] & 0xc0) != 0x80) {
577                                 errno = EILSEQ;
578                                 goto error;
579                         }
580                         uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
581                         uc[0] = (c[1]<<6) | (c[2]&0x3f);
582                         c  += 3;
583                         in_left  -= 3;
584                         out_left -= 2;
585                         uc += 2;
586                         continue;
587                 }
588
589                 if ((c[0] & 0xf8) == 0xf0) {
590                         unsigned int codepoint;
591                         if (in_left < 4 ||
592                             (c[1] & 0xc0) != 0x80 || 
593                             (c[2] & 0xc0) != 0x80 ||
594                             (c[3] & 0xc0) != 0x80) {
595                                 errno = EILSEQ;
596                                 goto error;
597                         }
598                         codepoint = 
599                                 (c[3]&0x3f) | 
600                                 ((c[2]&0x3f)<<6) | 
601                                 ((c[1]&0x3f)<<12) |
602                                 ((c[0]&0x7)<<18);
603                         if (codepoint < 0x10000) {
604                                 /* accept UTF-8 characters that are not
605                                    minimally packed, but pack the result */
606                                 uc[0] = (codepoint & 0xFF);
607                                 uc[1] = (codepoint >> 8);
608                                 c += 4;
609                                 in_left -= 4;
610                                 out_left -= 2;
611                                 uc += 2;
612                                 continue;
613                         }
614
615                         codepoint -= 0x10000;
616
617                         if (out_left < 4) {
618                                 errno = E2BIG;
619                                 goto error;
620                         }
621
622                         uc[0] = (codepoint>>10) & 0xFF;
623                         uc[1] = (codepoint>>18) | 0xd8;
624                         uc[2] = codepoint & 0xFF;
625                         uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
626                         c  += 4;
627                         in_left  -= 4;
628                         out_left -= 4;
629                         uc += 4;
630                         continue;
631                 }
632
633                 /* we don't handle 5 byte sequences */
634                 errno = EINVAL;
635                 goto error;
636         }
637
638         if (in_left > 0) {
639                 errno = E2BIG;
640                 goto error;
641         }
642
643         *inbytesleft = in_left;
644         *outbytesleft = out_left;
645         *inbuf = c;
646         *outbuf = uc;   
647         return 0;
648
649 error:
650         *inbytesleft = in_left;
651         *outbytesleft = out_left;
652         *inbuf = c;
653         *outbuf = uc;
654         return -1;
655 }
656
657 static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
658                         char **outbuf, size_t *outbytesleft)
659 {
660         size_t in_left=*inbytesleft, out_left=*outbytesleft;
661         uint8 *c = (uint8 *)*outbuf;
662         const uint8 *uc = (const uint8 *)*inbuf;
663
664         while (in_left >= 2 && out_left >= 1) {
665                 unsigned int codepoint;
666
667                 if (uc[1] == 0 && !(uc[0] & 0x80)) {
668                         /* simplest case */
669                         c[0] = uc[0];
670                         in_left  -= 2;
671                         out_left -= 1;
672                         uc += 2;
673                         c  += 1;
674                         continue;
675                 }
676
677                 if ((uc[1]&0xf8) == 0) {
678                         /* next simplest case */
679                         if (out_left < 2) {
680                                 errno = E2BIG;
681                                 goto error;
682                         }
683                         c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
684                         c[1] = 0x80 | (uc[0] & 0x3f);
685                         in_left  -= 2;
686                         out_left -= 2;
687                         uc += 2;
688                         c  += 2;
689                         continue;
690                 }
691
692                 if ((uc[1] & 0xfc) == 0xdc) {
693                         /* its the second part of a 4 byte sequence. Illegal */
694                         if (in_left < 4) {
695                                 errno = EINVAL;
696                         } else {
697                                 errno = EILSEQ;
698                         }
699                         goto error;
700                 }
701
702                 if ((uc[1] & 0xfc) != 0xd8) {
703                         codepoint = uc[0] | (uc[1]<<8);
704                         if (out_left < 3) {
705                                 errno = E2BIG;
706                                 goto error;
707                         }
708                         c[0] = 0xe0 | (codepoint >> 12);
709                         c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
710                         c[2] = 0x80 | (codepoint & 0x3f);
711                         
712                         in_left  -= 2;
713                         out_left -= 3;
714                         uc  += 2;
715                         c   += 3;
716                         continue;
717                 }
718
719                 /* its the first part of a 4 byte sequence */
720                 if (in_left < 4) {
721                         errno = EINVAL;
722                         goto error;
723                 }
724                 if ((uc[3] & 0xfc) != 0xdc) {
725                         errno = EILSEQ;
726                         goto error;
727                 }
728                 codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) | 
729                                        (uc[0]<<10) | ((uc[1] & 0x3)<<18));
730                 
731                 if (out_left < 4) {
732                         errno = E2BIG;
733                         goto error;
734                 }
735                 c[0] = 0xf0 | (codepoint >> 18);
736                 c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
737                 c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
738                 c[3] = 0x80 | (codepoint & 0x3f);
739                 
740                 in_left  -= 4;
741                 out_left -= 4;
742                 uc       += 4;
743                 c        += 4;
744         }
745
746         if (in_left == 1) {
747                 errno = EINVAL;
748                 goto error;
749         }
750
751         if (in_left > 1) {
752                 errno = E2BIG;
753                 goto error;
754         }
755
756         *inbytesleft = in_left;
757         *outbytesleft = out_left;
758         *inbuf  = uc;
759         *outbuf = c;
760         
761         return 0;
762
763 error:
764         *inbytesleft = in_left;
765         *outbytesleft = out_left;
766         *inbuf  = uc;
767         *outbuf = c;
768         return -1;
769 }
770