Fix bug 6157
[ira/wip.git] / source3 / lib / iconv.c
index d161975601c5654e0282c11218d64596ea345b53..44500542f2baf8961217855a45a3922eacf610fc 100644 (file)
@@ -6,7 +6,7 @@
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 #include "includes.h"
 
+/*
+ * We have to use strcasecmp here as the character conversions
+ * haven't been initialised yet. JRA.
+ */
+
+#undef strcasecmp
 
 /**
  * @file
  * @sa Samba Developers Guide
  **/
 
-static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
-static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
-static size_t  utf8_pull(void *,char **, size_t *, char **, size_t *);
-static size_t  utf8_push(void *,char **, size_t *, char **, size_t *);
-static size_t ucs2hex_pull(void *,char **, size_t *, char **, size_t *);
-static size_t ucs2hex_push(void *,char **, size_t *, char **, size_t *);
-static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
+static_decl_charset;
+
+static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
+static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
+static size_t  utf8_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t  utf8_push(void *,const char **, size_t *, char **, size_t *);
+static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
+static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
+static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
 
 static struct charset_functions builtin_functions[] = {
+       /* windows is really neither UCS-2 not UTF-16 */
        {"UCS-2LE",  iconv_copy, iconv_copy},
+       {"UTF-16LE",  iconv_copy, iconv_copy},
+       {"UCS-2BE",  iconv_swab, iconv_swab},
+       {"UTF-16BE",  iconv_swab, iconv_swab},
+
+       /* we include the UTF-8 alias to cope with differing locale settings */
        {"UTF8",   utf8_pull,  utf8_push},
+       {"UTF-8",   utf8_pull,  utf8_push},
        {"ASCII", ascii_pull, ascii_push},
+       {"646", ascii_pull, ascii_push},
+       {"ISO-8859-1", ascii_pull, latin1_push},
        {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
        {NULL, NULL, NULL}
 };
@@ -66,12 +84,9 @@ static struct charset_functions *charsets = NULL;
 static struct charset_functions *find_charset_functions(const char *name) 
 {
        struct charset_functions *c = charsets;
-       pstring stripped;
 
-       module_path_get_name(name, stripped);
-       
        while(c) {
-               if (strcasecmp(stripped, c->name) == 0) {
+               if (strcasecmp(name, c->name) == 0) {
                        return c;
                }
                c = c->next;
@@ -80,29 +95,28 @@ static struct charset_functions *find_charset_functions(const char *name)
        return NULL;
 }
 
-BOOL smb_register_charset(struct charset_functions *funcs) 
+NTSTATUS smb_register_charset(struct charset_functions *funcs) 
 {
-       struct charset_functions *c = charsets;
+       if (!funcs) {
+               return NT_STATUS_INVALID_PARAMETER;
+       }
 
        DEBUG(5, ("Attempting to register new charset %s\n", funcs->name));
        /* Check whether we already have this charset... */
-       while(c) {
-               if(!strcasecmp(c->name, funcs->name)){ 
-                       DEBUG(2, ("Duplicate charset %s, not registering\n", funcs->name));
-                       return False;
-               }
-               c = c->next;
+       if (find_charset_functions(funcs->name)) {
+               DEBUG(0, ("Duplicate charset %s, not registering\n", funcs->name));
+               return NT_STATUS_OBJECT_NAME_COLLISION;
        }
 
        funcs->next = funcs->prev = NULL;
        DEBUG(5, ("Registered charset %s\n", funcs->name));
        DLIST_ADD(charsets, funcs);
-       return True;
+       return NT_STATUS_OK;
 }
 
-void lazy_initialize_iconv(void)
+static void lazy_initialize_iconv(void)
 {
-       static BOOL initialized;
+       static bool initialized;
        int i;
 
        if (!initialized) {
@@ -113,24 +127,25 @@ void lazy_initialize_iconv(void)
        }
 }
 
+#ifdef HAVE_NATIVE_ICONV
 /* if there was an error then reset the internal state,
    this ensures that we don't have a shift state remaining for
    character sets like SJIS */
 static size_t sys_iconv(void *cd, 
-                       char **inbuf, size_t *inbytesleft,
+                       const char **inbuf, size_t *inbytesleft,
                        char **outbuf, size_t *outbytesleft)
 {
-#ifdef HAVE_NATIVE_ICONV
        size_t ret = iconv((iconv_t)cd, 
-                          inbuf, inbytesleft, 
+                          (void *)inbuf, inbytesleft,
                           outbuf, outbytesleft);
-       if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
+       if (ret == (size_t)-1) {
+               int saved_errno = errno;
+               iconv(cd, NULL, NULL, NULL, NULL);
+               errno = saved_errno;
+       }
        return ret;
-#else
-       errno = EINVAL;
-       return -1;
-#endif
 }
+#endif
 
 /**
  * This is a simple portable iconv() implementaion.
@@ -149,7 +164,7 @@ size_t smb_iconv(smb_iconv_t cd,
        /* in many cases we can go direct */
        if (cd->direct) {
                return cd->direct(cd->cd_direct, 
-                                 (char **)inbuf, inbytesleft, outbuf, outbytesleft);
+                                 inbuf, inbytesleft, outbuf, outbytesleft);
        }
 
 
@@ -159,20 +174,27 @@ size_t smb_iconv(smb_iconv_t cd,
                bufsize = sizeof(cvtbuf);
                
                if (cd->pull(cd->cd_pull, 
-                            (char **)inbuf, inbytesleft, &bufp, &bufsize) == -1
+                            inbuf, inbytesleft, &bufp, &bufsize) == -1
                    && errno != E2BIG) return -1;
 
                bufp = cvtbuf;
                bufsize = sizeof(cvtbuf) - bufsize;
 
                if (cd->push(cd->cd_push, 
-                            &bufp, &bufsize, 
+                            (const char **)&bufp, &bufsize, 
                             outbuf, outbytesleft) == -1) return -1;
        }
 
        return 0;
 }
 
+
+static bool is_utf16(const char *name)
+{
+       return strcasecmp(name, "UCS-2LE") == 0 ||
+               strcasecmp(name, "UTF-16LE") == 0;
+}
+
 /*
   simple iconv_open() wrapper
  */
@@ -185,15 +207,15 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
        from = charsets;
        to = charsets;
 
-       ret = (smb_iconv_t)malloc(sizeof(*ret));
+       ret = SMB_MALLOC_P(struct smb_iconv_s);
        if (!ret) {
                errno = ENOMEM;
                return (smb_iconv_t)-1;
        }
-       memset(ret, 0, sizeof(*ret));
+       memset(ret, 0, sizeof(struct smb_iconv_s));
 
-       ret->from_name = strdup(fromcode);
-       ret->to_name = strdup(tocode);
+       ret->from_name = SMB_STRDUP(fromcode);
+       ret->to_name = SMB_STRDUP(tocode);
 
        /* check for the simplest null conversion */
        if (strcasecmp(fromcode, tocode) == 0) {
@@ -211,27 +233,31 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
        /* check if we can use iconv for this conversion */
 #ifdef HAVE_NATIVE_ICONV
        if (!ret->pull) {
-               ret->cd_pull = iconv_open("UCS-2LE", fromcode);
+               ret->cd_pull = iconv_open("UTF-16LE", fromcode);
+               if (ret->cd_pull == (iconv_t)-1)
+                       ret->cd_pull = iconv_open("UCS-2LE", fromcode);
                if (ret->cd_pull != (iconv_t)-1)
                        ret->pull = sys_iconv;
        }
 
        if (!ret->push) {
-               ret->cd_push = iconv_open(tocode, "UCS-2LE");
+               ret->cd_push = iconv_open(tocode, "UTF-16LE");
+               if (ret->cd_push == (iconv_t)-1)
+                       ret->cd_push = iconv_open(tocode, "UCS-2LE");
                if (ret->cd_push != (iconv_t)-1)
                        ret->push = sys_iconv;
        }
 #endif
        
        /* check if there is a module available that can do this conversion */
-       if (!ret->pull && smb_probe_module("charset", fromcode)) {
+       if (!ret->pull && NT_STATUS_IS_OK(smb_probe_module("charset", fromcode))) {
                if(!(from = find_charset_functions(fromcode)))
                        DEBUG(0, ("Module %s doesn't provide charset %s!\n", fromcode, fromcode));
                else 
                        ret->pull = from->pull;
        }
 
-       if (!ret->push && smb_probe_module("charset", tocode)) {
+       if (!ret->push && NT_STATUS_IS_OK(smb_probe_module("charset", tocode))) {
                if(!(to = find_charset_functions(tocode)))
                        DEBUG(0, ("Module %s doesn't provide charset %s!\n", tocode, tocode));
                else 
@@ -247,13 +273,13 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
        }
 
        /* check for conversion to/from ucs2 */
-       if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
+       if (is_utf16(fromcode) && to) {
                ret->direct = to->push;
                ret->push = ret->pull = NULL;
                return ret;
        }
 
-       if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
+       if (is_utf16(tocode) && from) {
                ret->direct = from->pull;
                ret->push = ret->pull = NULL;
                return ret;
@@ -261,13 +287,13 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 
        /* Check if we can do the conversion direct */
 #ifdef HAVE_NATIVE_ICONV
-       if (strcasecmp(fromcode, "UCS-2LE") == 0) {
+       if (is_utf16(fromcode)) {
                ret->direct = sys_iconv;
                ret->cd_direct = ret->cd_push;
                ret->cd_push = NULL;
                return ret;
        }
-       if (strcasecmp(tocode, "UCS-2LE") == 0) {
+       if (is_utf16(tocode)) {
                ret->direct = sys_iconv;
                ret->cd_direct = ret->cd_pull;
                ret->cd_pull = NULL;
@@ -304,7 +330,7 @@ int smb_iconv_close (smb_iconv_t cd)
  multi-byte character set support for english users
 ***********************************************************************/
 
-static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
@@ -324,7 +350,7 @@ static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
-static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        int ir_count=0;
@@ -351,8 +377,34 @@ static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft,
        return ir_count;
 }
 
+static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
+                        char **outbuf, size_t *outbytesleft)
+{
+       int ir_count=0;
+
+       while (*inbytesleft >= 2 && *outbytesleft >= 1) {
+               (*outbuf)[0] = (*inbuf)[0];
+               if ((*inbuf)[1]) ir_count++;
+               (*inbytesleft)  -= 2;
+               (*outbytesleft) -= 1;
+               (*inbuf)  += 2;
+               (*outbuf) += 1;
+       }
+
+       if (*inbytesleft == 1) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (*inbytesleft > 1) {
+               errno = E2BIG;
+               return -1;
+       }
+       
+       return ir_count;
+}
 
-static size_t ucs2hex_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
@@ -395,7 +447,7 @@ static size_t ucs2hex_pull(void *cd, char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
-static size_t ucs2hex_push(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
                           char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 2 && *outbytesleft >= 1) {
@@ -436,8 +488,32 @@ static size_t ucs2hex_push(void *cd, char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
+static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
+                        char **outbuf, size_t *outbytesleft)
+{
+       int n;
+
+       n = MIN(*inbytesleft, *outbytesleft);
+
+       swab(*inbuf, *outbuf, (n&~1));
+       if (n&1) {
+               (*outbuf)[n-1] = 0;
+       }
+
+       (*inbytesleft) -= n;
+       (*outbytesleft) -= n;
+       (*inbuf) += n;
+       (*outbuf) += n;
+
+       if (*inbytesleft > 0) {
+               errno = E2BIG;
+               return -1;
+       }
+
+       return 0;
+}
 
-static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        int n;
@@ -459,103 +535,241 @@ static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
-static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
-       while (*inbytesleft >= 1 && *outbytesleft >= 2) {
-               unsigned char *c = (unsigned char *)*inbuf;
-               unsigned char *uc = (unsigned char *)*outbuf;
-               int len = 1;
+       size_t in_left=*inbytesleft, out_left=*outbytesleft;
+       const uint8 *c = (const uint8 *)*inbuf;
+       uint8 *uc = (uint8 *)*outbuf;
+
+       while (in_left >= 1 && out_left >= 2) {
+               unsigned int codepoint;
 
                if ((c[0] & 0x80) == 0) {
                        uc[0] = c[0];
                        uc[1] = 0;
-               } else if ((c[0] & 0xf0) == 0xe0) {
-                       if (*inbytesleft < 3) {
-                               DEBUG(0,("short utf8 char\n"));
-                               goto badseq;
+                       c  += 1;
+                       in_left  -= 1;
+                       out_left -= 2;
+                       uc += 2;
+                       continue;
+               }
+
+               if ((c[0] & 0xe0) == 0xc0) {
+                       if (in_left < 2 ||
+                           (c[1] & 0xc0) != 0x80) {
+                               errno = EILSEQ;
+                               goto error;
                        }
-                       uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
-                       uc[0] = (c[1]<<6) | (c[2]&0x3f);
-                       len = 3;
-               } else if ((c[0] & 0xe0) == 0xc0) {
-                       if (*inbytesleft < 2) {
-                               DEBUG(0,("short utf8 char\n"));
-                               goto badseq;
+                       codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
+                       if (codepoint < 0x80) {
+                               /* don't accept UTF-8 characters that are not minimally packed */
+                               errno = EILSEQ;
+                               goto error;
                        }
-                       uc[1] = (c[0]>>2) & 0x7;
-                       uc[0] = (c[0]<<6) | (c[1]&0x3f);
-                       len = 2;
+                       uc[1] = codepoint >> 8;
+                       uc[0] = codepoint & 0xff;
+                       c  += 2;
+                       in_left  -= 2;
+                       out_left -= 2;
+                       uc += 2;
+                       continue;
                }
 
-               (*inbuf)  += len;
-               (*inbytesleft)  -= len;
-               (*outbytesleft) -= 2;
-               (*outbuf) += 2;
+               if ((c[0] & 0xf0) == 0xe0) {
+                       if (in_left < 3 ||
+                           (c[1] & 0xc0) != 0x80 || 
+                           (c[2] & 0xc0) != 0x80) {
+                               errno = EILSEQ;
+                               goto error;
+                       }
+                       codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
+                       if (codepoint < 0x800) {
+                               /* don't accept UTF-8 characters that are not minimally packed */
+                               errno = EILSEQ;
+                               goto error;
+                       }
+                       uc[1] = codepoint >> 8;
+                       uc[0] = codepoint & 0xff;
+                       c  += 3;
+                       in_left  -= 3;
+                       out_left -= 2;
+                       uc += 2;
+                       continue;
+               }
+
+               if ((c[0] & 0xf8) == 0xf0) {
+                       if (in_left < 4 ||
+                           (c[1] & 0xc0) != 0x80 || 
+                           (c[2] & 0xc0) != 0x80 ||
+                           (c[3] & 0xc0) != 0x80) {
+                               errno = EILSEQ;
+                               goto error;
+                       }
+                       codepoint = 
+                               (c[3]&0x3f) | 
+                               ((c[2]&0x3f)<<6) | 
+                               ((c[1]&0x3f)<<12) |
+                               ((c[0]&0x7)<<18);
+                       if (codepoint < 0x10000 || codepoint > 0x10ffff) {
+                               /* don't accept UTF-8 characters that are not minimally packed */
+                               errno = EILSEQ;
+                               goto error;
+                       }
+
+                       codepoint -= 0x10000;
+
+                       if (out_left < 4) {
+                               errno = E2BIG;
+                               goto error;
+                       }
+
+                       uc[0] = (codepoint>>10) & 0xFF;
+                       uc[1] = (codepoint>>18) | 0xd8;
+                       uc[2] = codepoint & 0xFF;
+                       uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
+                       c  += 4;
+                       in_left  -= 4;
+                       out_left -= 4;
+                       uc += 4;
+                       continue;
+               }
+
+               /* we don't handle 5 byte sequences */
+               errno = EINVAL;
+               goto error;
        }
 
-       if (*inbytesleft > 0) {
+       if (in_left > 0) {
                errno = E2BIG;
-               return -1;
+               goto error;
        }
-       
+
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf = (char *)c;
+       *outbuf = (char *)uc;   
        return 0;
 
-badseq:
-       errno = EINVAL;
+error:
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf = (char *)c;
+       *outbuf = (char *)uc;
        return -1;
 }
 
-static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
-                        char **outbuf, size_t *outbytesleft)
+static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
+                       char **outbuf, size_t *outbytesleft)
 {
-       while (*inbytesleft >= 2 && *outbytesleft >= 1) {
-               unsigned char *c = (unsigned char *)*outbuf;
-               unsigned char *uc = (unsigned char *)*inbuf;
-               int len=1;
-
-               if (uc[1] & 0xf8) {
-                       if (*outbytesleft < 3) {
-                               DEBUG(0,("short utf8 write\n"));
-                               goto toobig;
+       size_t in_left=*inbytesleft, out_left=*outbytesleft;
+       uint8 *c = (uint8 *)*outbuf;
+       const uint8 *uc = (const uint8 *)*inbuf;
+
+       while (in_left >= 2 && out_left >= 1) {
+               unsigned int codepoint;
+
+               if (uc[1] == 0 && !(uc[0] & 0x80)) {
+                       /* simplest case */
+                       c[0] = uc[0];
+                       in_left  -= 2;
+                       out_left -= 1;
+                       uc += 2;
+                       c  += 1;
+                       continue;
+               }
+
+               if ((uc[1]&0xf8) == 0) {
+                       /* next simplest case */
+                       if (out_left < 2) {
+                               errno = E2BIG;
+                               goto error;
                        }
-                       c[0] = 0xe0 | (uc[1]>>4);
-                       c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
-                       c[2] = 0x80 | (uc[0]&0x3f);
-                       len = 3;
-               } else if (uc[1] | (uc[0] & 0x80)) {
-                       if (*outbytesleft < 2) {
-                               DEBUG(0,("short utf8 write\n"));
-                               goto toobig;
+                       c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
+                       c[1] = 0x80 | (uc[0] & 0x3f);
+                       in_left  -= 2;
+                       out_left -= 2;
+                       uc += 2;
+                       c  += 2;
+                       continue;
+               }
+
+               if ((uc[1] & 0xfc) == 0xdc) {
+                       /* its the second part of a 4 byte sequence. Illegal */
+                       if (in_left < 4) {
+                               errno = EINVAL;
+                       } else {
+                               errno = EILSEQ;
                        }
-                       c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
-                       c[1] = 0x80 | (uc[0]&0x3f);
-                       len = 2;
-               } else {
-                       c[0] = uc[0];
+                       goto error;
                }
 
+               if ((uc[1] & 0xfc) != 0xd8) {
+                       codepoint = uc[0] | (uc[1]<<8);
+                       if (out_left < 3) {
+                               errno = E2BIG;
+                               goto error;
+                       }
+                       c[0] = 0xe0 | (codepoint >> 12);
+                       c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+                       c[2] = 0x80 | (codepoint & 0x3f);
+                       
+                       in_left  -= 2;
+                       out_left -= 3;
+                       uc  += 2;
+                       c   += 3;
+                       continue;
+               }
 
-               (*inbytesleft)  -= 2;
-               (*outbytesleft) -= len;
-               (*inbuf)  += 2;
-               (*outbuf) += len;
+               /* its the first part of a 4 byte sequence */
+               if (in_left < 4) {
+                       errno = EINVAL;
+                       goto error;
+               }
+               if ((uc[3] & 0xfc) != 0xdc) {
+                       errno = EILSEQ;
+                       goto error;
+               }
+               codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) | 
+                                      (uc[0]<<10) | ((uc[1] & 0x3)<<18));
+               
+               if (out_left < 4) {
+                       errno = E2BIG;
+                       goto error;
+               }
+               c[0] = 0xf0 | (codepoint >> 18);
+               c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+               c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+               c[3] = 0x80 | (codepoint & 0x3f);
+               
+               in_left  -= 4;
+               out_left -= 4;
+               uc       += 4;
+               c        += 4;
        }
 
-       if (*inbytesleft == 1) {
+       if (in_left == 1) {
                errno = EINVAL;
-               return -1;
+               goto error;
        }
 
-       if (*inbytesleft > 1) {
+       if (in_left > 1) {
                errno = E2BIG;
-               return -1;
+               goto error;
        }
+
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf  = (char *)uc;
+       *outbuf = (char *)c;
        
        return 0;
 
-toobig:
-       errno = E2BIG;
+error:
+       *inbytesleft = in_left;
+       *outbytesleft = out_left;
+       *inbuf  = (char *)uc;
+       *outbuf = (char *)c;
        return -1;
 }