r23572: Ensure we obey Unicode consortium restrictions. Code
authorJeremy Allison <jra@samba.org>
Thu, 21 Jun 2007 17:25:13 +0000 (17:25 +0000)
committerGerald (Jerry) Carter <jerry@samba.org>
Wed, 10 Oct 2007 17:23:31 +0000 (12:23 -0500)
based on patch from MORIYAMA Masayuki <moriyama@miraclelinux.com>.
Jeremy.

source/lib/iconv.c

index 6e040b77f17ea0e0f4080ef6f4b80d1c6a85409c..90e2faab6fbca7bb921d7bdf2467ffee570ff046 100644 (file)
@@ -544,6 +544,8 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
        uint8 *uc = (uint8 *)*outbuf;
 
        while (in_left >= 1 && out_left >= 2) {
+               unsigned int codepoint;
+
                if ((c[0] & 0x80) == 0) {
                        uc[0] = c[0];
                        uc[1] = 0;
@@ -560,8 +562,14 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                                errno = EILSEQ;
                                goto error;
                        }
-                       uc[1] = (c[0]>>2) & 0x7;
-                       uc[0] = (c[0]<<6) | (c[1]&0x3f);
+                       codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
+                       if (codepoint < 0x80) {
+                               /* don't accept UTF-8 characters that are not minimally packed */
+                               errno = EILSEQ;
+                               goto error;
+                       }
+                       uc[1] = codepoint >> 8;
+                       uc[0] = codepoint & 0xff;
                        c  += 2;
                        in_left  -= 2;
                        out_left -= 2;
@@ -576,8 +584,14 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                                errno = EILSEQ;
                                goto error;
                        }
-                       uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
-                       uc[0] = (c[1]<<6) | (c[2]&0x3f);
+                       codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
+                       if (codepoint < 0x800) {
+                               /* don't accept UTF-8 characters that are not minimally packed */
+                               errno = EILSEQ;
+                               goto error;
+                       }
+                       uc[1] = codepoint >> 8;
+                       uc[0] = codepoint & 0xff;
                        c  += 3;
                        in_left  -= 3;
                        out_left -= 2;
@@ -586,7 +600,6 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                }
 
                if ((c[0] & 0xf8) == 0xf0) {
-                       unsigned int codepoint;
                        if (in_left < 4 ||
                            (c[1] & 0xc0) != 0x80 || 
                            (c[2] & 0xc0) != 0x80 ||
@@ -599,16 +612,10 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
                                ((c[2]&0x3f)<<6) | 
                                ((c[1]&0x3f)<<12) |
                                ((c[0]&0x7)<<18);
-                       if (codepoint < 0x10000) {
-                               /* accept UTF-8 characters that are not
-                                  minimally packed, but pack the result */
-                               uc[0] = (codepoint & 0xFF);
-                               uc[1] = (codepoint >> 8);
-                               c += 4;
-                               in_left -= 4;
-                               out_left -= 2;
-                               uc += 2;
-                               continue;
+                       if (codepoint < 0x10000 || codepoint > 0x10ffff) {
+                               /* don't accept UTF-8 characters that are not minimally packed */
+                               errno = EILSEQ;
+                               goto error;
                        }
 
                        codepoint -= 0x10000;