From d3ed722566f42d3f614b1221a8e4f19092976531 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 8 Jan 2012 07:19:21 -0500 Subject: [PATCH] Simplify char16_t implementation --- ChangeLog | 12 ++ iconv/gconv_builtin.h | 14 -- iconv/gconv_int.h | 4 - iconv/gconv_simple.c | 388 ------------------------------------------ iconv/iconv_prog.c | 12 +- wcsmbs/c16rtomb.c | 97 +---------- wcsmbs/mbrtoc16.c | 75 ++++---- wcsmbs/mbrtowc.c | 9 +- wcsmbs/wcsmbsload.c | 85 +-------- wcsmbs/wcsmbsload.h | 5 - 10 files changed, 59 insertions(+), 642 deletions(-) diff --git a/ChangeLog b/ChangeLog index d33e6554ed..55d8069dc0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2012-01-08 Ulrich Drepper + * wcsmbs/mbrtoc16.c: Implement using towc function. + * wcsmbs/wcsmbsload.h: No need for toc16 and fromc16 functions. + * wcsmbs/wcsmbsload.c: Likewise. + * iconv/gconv_simple.c: Likewise. + * iconv/gconv_int.h: Likewise. + * iconv/gconv_builtin.h: Likewise. + * iconv/iconv_prog.c: Remove CHAR16 handling. + + * wcsmbs/c16rtomb.c: Remove #if 0'ed code. + + * wcsmbs/mbrtowc.c: Better check for invalid inputs. + * configure.in: Remove --with-elf and --enable-bounded options. Dont set base_machine for ia64. More non-ELF conditions removed. Remove testing and setting of leading underscore information. diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index 6820f828ec..271a4be87c 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -122,17 +122,3 @@ BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, __gconv_transform_internal_ucs2reverse, NULL, 4, 4, 2, 2) #endif - - -BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "CHAR16", 1, "=ascii->CHAR16", - __gconv_transform_ascii_char16, NULL, 1, 1, 2, 4) - -BUILTIN_TRANSFORMATION ("CHAR16", "ANSI_X3.4-1968//", 1, "=CHAR16->ascii", - __gconv_transform_char16_ascii, NULL, 2, 4, 1, 1) - - -BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "CHAR16", 1, "=utf8->CHAR16", - __gconv_transform_utf8_char16, NULL, 1, 6, 2, 4) - -BUILTIN_TRANSFORMATION ("CHAR16", "ISO-10646/UTF8/", 1, "=CHAR16->utf8", - __gconv_transform_char16_utf8, NULL, 2, 4, 1, 6) diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 7508372707..a2fcb93740 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -303,10 +303,6 @@ __BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le); __BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal); __BUILTIN_TRANSFORM (__gconv_transform_internal_utf16); __BUILTIN_TRANSFORM (__gconv_transform_utf16_internal); -__BUILTIN_TRANSFORM (__gconv_transform_ascii_char16); -__BUILTIN_TRANSFORM (__gconv_transform_char16_ascii); -__BUILTIN_TRANSFORM (__gconv_transform_utf8_char16); -__BUILTIN_TRANSFORM (__gconv_transform_char16_utf8); # undef __BUITLIN_TRANSFORM /* Specialized conversion function for a single byte to INTERNAL, recognizing diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 028822c918..67761603f9 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -1321,391 +1321,3 @@ ucs4le_internal_loop_single (struct __gconv_step *step, #define LOOP_NEED_FLAGS #include #include - - -/* Convert from ISO 646-IRV to the char16_t format. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 1 -#define MIN_NEEDED_TO 2 -#define FROM_DIRECTION 1 -#define FROM_LOOP ascii_char16_loop -#define TO_LOOP ascii_char16_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_ascii_char16 -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - if (__builtin_expect (*inptr > '\x7f', 0)) \ - { \ - /* The value is too large. We don't try transliteration here since \ - this is not an error because of the lack of possibilities to \ - represent the result. This is a genuine bug in the input since \ - ASCII does not allow such values. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (1); \ - } \ - else \ - { \ - /* It's an one byte sequence. */ \ - *((uint16_t *) outptr) = *inptr++; \ - outptr += sizeof (uint16_t); \ - } \ - } -#define LOOP_NEED_FLAGS -#include -#include - - -/* Convert from the char16_t format to ISO 646-IRV. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 2 -#define MIN_NEEDED_TO 1 -#define FROM_DIRECTION 1 -#define FROM_LOOP char16_ascii_loop -#define TO_LOOP char16_ascii_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_char16_ascii -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - if (__builtin_expect (*((const uint16_t *) inptr) > 0x7f, 0)) \ - { \ - UNICODE_TAG_HANDLER (*((const uint16_t *) inptr), 2); \ - STANDARD_TO_LOOP_ERR_HANDLER (2); \ - } \ - else \ - { \ - /* It's an one byte sequence. */ \ - *outptr++ = *((const uint16_t *) inptr); \ - inptr += 2; \ - } \ - } -#define LOOP_NEED_FLAGS -#include -#include - - -/* Convert from the char16_t format to UTF-8. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 2 -#define MAX_NEEDED_FROM 4 -#define MIN_NEEDED_TO 1 -#define MAX_NEEDED_TO 6 -#define FROM_DIRECTION 1 -#define FROM_LOOP char16_utf8_loop -#define TO_LOOP char16_utf8_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_char16_utf8 -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MAX_NEEDED_INPUT MAX_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - /* Yes, reading a 16-bit number and storing it as 32-bit is correct. */ \ - uint32_t wc = *((const uint16_t *) inptr); \ - inptr += 2; \ - \ - if (__builtin_expect (wc < 0x80, 1)) \ - /* It's an one byte sequence. */ \ - *outptr++ = (unsigned char) wc; \ - else \ - { \ - size_t step; \ - \ - if (__builtin_expect (wc < 0xd800 || wc > 0xdfff, 1)) \ - step = wc < 0x800 ? 2 : 3; \ - else \ - { \ - if (__builtin_expect (inptr + 2 > inend, 0)) \ - { \ - /* We don't have enough input for another complete input \ - character. */ \ - inptr -= 2; \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ - \ - uint32_t sec = *((const uint16_t *) inptr); \ - if (__builtin_expect (sec < 0xdc00, 0) \ - || __builtin_expect (sec > 0xdfff, 0)) \ - { \ - /* This is no valid second word for a surrogate. */ \ - STANDARD_FROM_LOOP_ERR_HANDLER (2); \ - } \ - inptr += 2; \ - wc = ((wc - 0xd7c0) << 10) + (sec - 0xdc00); \ - \ - step = wc < 0x200000 ? 4 : 5; \ - } \ - \ - if (__builtin_expect (outptr + step > outend, 0)) \ - { \ - /* Too long. */ \ - result = __GCONV_FULL_OUTPUT; \ - inptr -= step >= 4 ? 4 : 2; \ - break; \ - } \ - \ - unsigned char *start = outptr; \ - *outptr = (unsigned char) (~0xff >> step); \ - outptr += step; \ - do \ - { \ - start[--step] = 0x80 | (wc & 0x3f); \ - wc >>= 6; \ - } \ - while (step > 1); \ - start[0] |= wc; \ - } \ - } -#define LOOP_NEED_FLAGS -#include -#include - - -/* Convert from UTF-8 to the char16_t format. */ -#define DEFINE_INIT 0 -#define DEFINE_FINI 0 -#define MIN_NEEDED_FROM 1 -#define MAX_NEEDED_FROM 6 -#define MIN_NEEDED_TO 2 -#define MAX_NEEDED_TO 4 -#define FROM_DIRECTION 1 -#define FROM_LOOP utf8_char16_loop -#define TO_LOOP utf8_char16_loop /* This is not used. */ -#define FUNCTION_NAME __gconv_transform_utf8_char16 -#define ONE_DIRECTION 1 - -#define MIN_NEEDED_INPUT MIN_NEEDED_FROM -#define MAX_NEEDED_INPUT MAX_NEEDED_FROM -#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO -#define LOOPFCT FROM_LOOP -#define BODY \ - { \ - /* Next input byte. */ \ - uint32_t ch = *inptr; \ - \ - if (__builtin_expect (ch < 0x80, 1)) \ - { \ - /* One byte sequence. */ \ - *((uint16_t *) outptr) = ch; \ - outptr += 2; \ - ++inptr; \ - } \ - else \ - { \ - uint_fast32_t cnt; \ - uint_fast32_t i; \ - \ - if (ch >= 0xc2 && ch < 0xe0) \ - { \ - /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ - otherwise the wide character could have been represented \ - using a single byte. */ \ - cnt = 2; \ - ch &= 0x1f; \ - } \ - else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ - { \ - /* We expect three bytes. */ \ - cnt = 3; \ - ch &= 0x0f; \ - } \ - else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ - { \ - /* We expect four bytes. */ \ - cnt = 4; \ - ch &= 0x07; \ - } \ - else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ - { \ - /* We expect five bytes. */ \ - cnt = 5; \ - ch &= 0x03; \ - } \ - else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \ - { \ - /* We expect six bytes. */ \ - cnt = 6; \ - ch &= 0x01; \ - } \ - else \ - { \ - /* Search the end of this ill-formed UTF-8 character. This \ - is the next byte with (x & 0xc0) != 0x80. */ \ - i = 0; \ - do \ - ++i; \ - while (inptr + i < inend \ - && (*(inptr + i) & 0xc0) == 0x80 \ - && i < 5); \ - \ - errout: \ - STANDARD_FROM_LOOP_ERR_HANDLER (i); \ - } \ - \ - if (__builtin_expect (inptr + cnt > inend, 0)) \ - { \ - /* We don't have enough input. But before we report that check \ - that all the bytes are correct. */ \ - for (i = 1; inptr + i < inend; ++i) \ - if ((inptr[i] & 0xc0) != 0x80) \ - break; \ - \ - if (__builtin_expect (inptr + i == inend, 1)) \ - { \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ - \ - goto errout; \ - } \ - \ - /* Read the possible remaining bytes. */ \ - for (i = 1; i < cnt; ++i) \ - { \ - uint32_t byte = inptr[i]; \ - \ - if ((byte & 0xc0) != 0x80) \ - /* This is an illegal encoding. */ \ - break; \ - \ - ch <<= 6; \ - ch |= byte & 0x3f; \ - } \ - \ - /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ - If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ - have been represented with fewer than cnt bytes. */ \ - if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ - /* Do not accept UTF-16 surrogates. */ \ - || (ch >= 0xd800 && ch <= 0xdfff)) \ - { \ - /* This is an illegal encoding. */ \ - goto errout; \ - } \ - \ - /* Now adjust the pointers and store the result. */ \ - if (ch < 0x10000) \ - *((uint16_t *) outptr) = ch; \ - else \ - { \ - if (__builtin_expect (outptr + 4 > outend, 0)) \ - { \ - result = __GCONV_FULL_OUTPUT; \ - break; \ - } \ - \ - *((uint16_t *) outptr) = 0xd7c0 + (ch >> 10); \ - outptr += 2; \ - *((uint16_t *) outptr) = 0xdc00 + (ch & 0x3ff); \ - } \ - \ - outptr += 2; \ - inptr += cnt; \ - } \ - } -#define LOOP_NEED_FLAGS - -#define STORE_REST \ - { \ - /* We store the remaining bytes while converting them into the UCS4 \ - format. We can assume that the first byte in the buffer is \ - correct and that it requires a larger number of bytes than there \ - are in the input buffer. */ \ - wint_t ch = **inptrp; \ - size_t cnt, r; \ - \ - state->__count = inend - *inptrp; \ - \ - assert (ch != 0xc0 && ch != 0xc1); \ - if (ch >= 0xc2 && ch < 0xe0) \ - { \ - /* We expect two bytes. The first byte cannot be 0xc0 or \ - 0xc1, otherwise the wide character could have been \ - represented using a single byte. */ \ - cnt = 2; \ - ch &= 0x1f; \ - } \ - else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ - { \ - /* We expect three bytes. */ \ - cnt = 3; \ - ch &= 0x0f; \ - } \ - else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ - { \ - /* We expect four bytes. */ \ - cnt = 4; \ - ch &= 0x07; \ - } \ - else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ - { \ - /* We expect five bytes. */ \ - cnt = 5; \ - ch &= 0x03; \ - } \ - else \ - { \ - /* We expect six bytes. */ \ - cnt = 6; \ - ch &= 0x01; \ - } \ - \ - /* The first byte is already consumed. */ \ - r = cnt - 1; \ - while (++(*inptrp) < inend) \ - { \ - ch <<= 6; \ - ch |= **inptrp & 0x3f; \ - --r; \ - } \ - \ - /* Shift for the so far missing bytes. */ \ - ch <<= r * 6; \ - \ - /* Store the number of bytes expected for the entire sequence. */ \ - state->__count |= cnt << 8; \ - \ - /* Store the value. */ \ - state->__value.__wch = ch; \ - } - -#define UNPACK_BYTES \ - { \ - static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \ - wint_t wch = state->__value.__wch; \ - size_t ntotal = state->__count >> 8; \ - \ - inlen = state->__count & 255; \ - \ - bytebuf[0] = inmask[ntotal - 2]; \ - \ - do \ - { \ - if (--ntotal < inlen) \ - bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ - wch >>= 6; \ - } \ - while (ntotal > 1); \ - \ - bytebuf[0] |= wch; \ - } - -#define CLEAR_STATE \ - state->__count = 0 - - -#include -#include diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 13facc8235..0d62a07147 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -719,12 +719,9 @@ add_known_names (struct gconv_module *node) add_known_names (node->right); do { - if (strcmp (node->from_string, "INTERNAL") != 0 - && strcmp (node->from_string, "CHAR16") != 0) - tsearch (node->from_string, &printlist, - (__compar_fn_t) strverscmp); - if (strcmp (node->to_string, "INTERNAL") != 0 - && strcmp (node->to_string, "CHAR16") != 0) + if (strcmp (node->from_string, "INTERNAL") != 0) + tsearch (node->from_string, &printlist, (__compar_fn_t) strverscmp); + if (strcmp (node->to_string, "INTERNAL") != 0) tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp); node = node->same; @@ -750,8 +747,7 @@ insert_cache (void) { const char *str = strtab + hashtab[cnt].string_offset; - if (strcmp (str, "INTERNAL") != 0 - && strcmp (str, "CHAR16") != 0) + if (strcmp (str, "INTERNAL") != 0) tsearch (str, &printlist, (__compar_fn_t) strverscmp); } } diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c index 3fed0b5d63..5374c755cc 100644 --- a/wcsmbs/c16rtomb.c +++ b/wcsmbs/c16rtomb.c @@ -17,25 +17,8 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#include -#include -#include -#include -#include #include -#include - -#include - -#ifndef EILSEQ -# define EILSEQ EINVAL -#endif - -#if __STDC__ >= 201000L -# define u(c) U##c -#else -# define u(c) L##c -#endif +#include /* This is the private state used if PS is NULL. */ @@ -44,85 +27,7 @@ static mbstate_t state; size_t c16rtomb (char *s, char16_t c16, mbstate_t *ps) { -#if 1 // XXX The ISO C 11 spec I have does not say anything about handling // XXX surrogates in this interface. return wcrtomb (s, c16, ps ?: &state); -#else - char buf[MB_LEN_MAX]; - struct __gconv_step_data data; - int status; - size_t result; - size_t dummy; - const struct gconv_fcts *fcts; - - /* Set information for this step. */ - data.__invocation_counter = 0; - data.__internal_use = 1; - data.__flags = __GCONV_IS_LAST; - data.__statep = ps ?: &state; - data.__trans = NULL; - - /* A first special case is if S is NULL. This means put PS in the - initial state. */ - if (s == NULL) - { - s = buf; - c16 = u('\0'); - } - - /* Tell where we want to have the result. */ - data.__outbuf = (unsigned char *) s; - data.__outbufend = (unsigned char *) s + MB_CUR_MAX; - - /* Get the conversion functions. */ - fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); - __gconv_fct fct = fcts->fromc16->__fct; -#ifdef PTR_DEMANGLE - if (fcts->tomb->__shlib_handle != NULL) - PTR_DEMANGLE (fct); -#endif - - /* If C16 is the NUL character we write into the output buffer - the byte sequence necessary for PS to get into the initial - state, followed by a NUL byte. */ - if (c16 == L'\0') - { - status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL, - NULL, &dummy, 1, 1)); - - if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT) - *data.__outbuf++ = '\0'; - } - else - { - /* Do a normal conversion. */ - const unsigned char *inbuf = (const unsigned char *) &c16; - - status = DL_CALL_FCT (fct, - (fcts->fromc16, &data, &inbuf, - inbuf + sizeof (char16_t), NULL, &dummy, - 0, 1)); - } - - /* There must not be any problems with the conversion but illegal input - characters. The output buffer must be large enough, otherwise the - definition of MB_CUR_MAX is not correct. All the other possible - errors also must not happen. */ - assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT - || status == __GCONV_ILLEGAL_INPUT - || status == __GCONV_INCOMPLETE_INPUT - || status == __GCONV_FULL_OUTPUT); - - if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT - || status == __GCONV_FULL_OUTPUT) - result = data.__outbuf - (unsigned char *) s; - else - { - result = (size_t) -1; - __set_errno (EILSEQ); - } - - return result; -#endif } diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c index df970fba4f..f5ed2b4ac9 100644 --- a/wcsmbs/mbrtoc16.c +++ b/wcsmbs/mbrtoc16.c @@ -30,12 +30,6 @@ # define EILSEQ EINVAL #endif -#if __STDC__ >= 201000L -# define U(c) U##c -#else -# define U(c) L##c -#endif - /* This is the private state used if PS is NULL. */ static mbstate_t state; @@ -46,6 +40,11 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (ps == NULL) ps = &state; + /* The standard text does not say that S being NULL means the state + is reset even if the second half of a surrogate still have to be + returned. In fact, the error code description indicates + otherwise. Therefore always first try to return a second + half. */ if (ps->__count & 0x80000000) { /* We have to return the second word for a surrogate. */ @@ -55,13 +54,13 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) return (size_t) -3; } - char16_t buf[2]; + wchar_t wc; struct __gconv_step_data data; int status; size_t result; size_t dummy; const unsigned char *inbuf, *endbuf; - unsigned char *outbuf = (unsigned char *) buf; + unsigned char *outbuf = (unsigned char *) &wc; const struct gconv_fcts *fcts; /* Set information for this step. */ @@ -75,14 +74,14 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) initial state. */ if (s == NULL) { - outbuf = (unsigned char *) buf; + pc16 = NULL; s = ""; n = 1; } /* Tell where we want the result. */ data.__outbuf = outbuf; - data.__outbufend = outbuf + sizeof (char16_t); + data.__outbufend = outbuf + sizeof (wchar_t); /* Get the conversion functions. */ fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); @@ -91,28 +90,20 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) inbuf = (const unsigned char *) s; endbuf = inbuf + n; if (__builtin_expect (endbuf < inbuf, 0)) - endbuf = (const unsigned char *) ~(uintptr_t) 0; - __gconv_fct fct = fcts->toc16->__fct; + { + endbuf = (const unsigned char *) ~(uintptr_t) 0; + if (endbuf == inbuf) + goto ilseq; + } + __gconv_fct fct = fcts->towc->__fct; #ifdef PTR_DEMANGLE - if (fcts->toc16->__shlib_handle != NULL) + if (fcts->towc->__shlib_handle != NULL) PTR_DEMANGLE (fct); #endif - /* We first have to check whether the character can be represented - without a surrogate. If we immediately pass in a buffer large - enough to hold two char16_t values and the first character does - not require a surrogate the routine will try to convert more - input if N is larger then needed for the first character. */ - status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, + status = DL_CALL_FCT (fct, (fcts->towc, &data, &inbuf, endbuf, NULL, &dummy, 0, 1)); - if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf) - { - data.__outbufend = outbuf + 2 * sizeof (char16_t); - status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, - NULL, &dummy, 0, 1)); - } - /* There must not be any problems with the conversion but illegal input characters. The output buffer must be large enough, otherwise the definition of MB_CUR_MAX is not correct. All the other possible @@ -125,33 +116,35 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT || status == __GCONV_FULL_OUTPUT) { - if (pc16 != NULL) - *pc16 = buf[0]; + result = inbuf - (const unsigned char *) s; - if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0')) + if (wc < 0x10000) { - /* The converted character is the NUL character. */ - assert (__mbsinit (data.__statep)); - result = 0; + if (pc16 != NULL) + *pc16 = wc; + + if (data.__outbuf != outbuf && wc == L'\0') + { + /* The converted character is the NUL character. */ + assert (__mbsinit (data.__statep)); + result = 0; + } } else { - result = inbuf - (const unsigned char *) s; + /* This is a surrogate. */ + if (pc16 != NULL) + *pc16 = 0xd7c0 + (wc >> 10); - if (data.__outbuf != outbuf + 2) - { - /* This is a surrogate. */ - assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff); - assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff); - ps->__count |= 0x80000000; - ps->__value.__wch = buf[1]; - } + ps->__count |= 0x80000000; + ps->__value.__wch = 0xdc00 + (wc & 0x3ff); } } else if (status == __GCONV_INCOMPLETE_INPUT) result = (size_t) -2; else { + ilseq: result = (size_t) -1; __set_errno (EILSEQ); } diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c index 03b8348d30..0c99b7401d 100644 --- a/wcsmbs/mbrtowc.c +++ b/wcsmbs/mbrtowc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011 +/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011, 2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -73,7 +73,11 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) inbuf = (const unsigned char *) s; endbuf = inbuf + n; if (__builtin_expect (endbuf < inbuf, 0)) - endbuf = (const unsigned char *) ~(uintptr_t) 0; + { + endbuf = (const unsigned char *) ~(uintptr_t) 0; + if (endbuf == inbuf) + goto ilseq; + } __gconv_fct fct = fcts->towc->__fct; #ifdef PTR_DEMANGLE if (fcts->towc->__shlib_handle != NULL) @@ -108,6 +112,7 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) result = (size_t) -2; else { + ilseq: result = (size_t) -1; __set_errno (EILSEQ); } diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c index 9ce26f1dc0..27ea442d40 100644 --- a/wcsmbs/wcsmbsload.c +++ b/wcsmbs/wcsmbsload.c @@ -68,44 +68,6 @@ static const struct __gconv_step to_mb = .__data = NULL }; -static const struct __gconv_step to_c16 = -{ - .__shlib_handle = NULL, - .__modname = NULL, - .__counter = INT_MAX, - .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT", - .__to_name = (char *) "UTF-16//", - .__fct = __gconv_transform_ascii_char16, - .__btowc_fct = NULL, - .__init_fct = NULL, - .__end_fct = NULL, - .__min_needed_from = 1, - .__max_needed_from = 1, - .__min_needed_to = 4, - .__max_needed_to = 4, - .__stateful = 0, - .__data = NULL -}; - -static const struct __gconv_step from_c16 = -{ - .__shlib_handle = NULL, - .__modname = NULL, - .__counter = INT_MAX, - .__from_name = (char *) "UTF-16//", - .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT", - .__fct = __gconv_transform_char16_ascii, - .__btowc_fct = NULL, - .__init_fct = NULL, - .__end_fct = NULL, - .__min_needed_from = 4, - .__max_needed_from = 4, - .__min_needed_to = 1, - .__max_needed_to = 1, - .__stateful = 0, - .__data = NULL -}; - /* For the default locale we only have to handle ANSI_X3.4-1968. */ const struct gconv_fcts __wcsmbs_gconv_fcts_c = @@ -114,11 +76,6 @@ const struct gconv_fcts __wcsmbs_gconv_fcts_c = .towc_nsteps = 1, .tomb = (struct __gconv_step *) &to_mb, .tomb_nsteps = 1, - - .toc16 = (struct __gconv_step *) &to_c16, - .toc16_nsteps = 1, - .fromc16 = (struct __gconv_step *) &from_c16, - .fromc16_nsteps = 1, }; @@ -234,24 +191,9 @@ __wcsmbs_load_conv (struct __locale_data *new_category) new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL", &new_fcts->tomb_nsteps); - if (new_fcts->tomb != NULL) - { - new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name, - &new_fcts->toc16_nsteps); - - if (new_fcts->toc16 != NULL) - new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16", - &new_fcts->fromc16_nsteps); - else - { - __gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps); - new_fcts->toc16 = NULL; - } - } - /* If any of the conversion functions is not available we don't use any since this would mean we cannot convert back and - forth.*/ + forth. NB: NEW_FCTS was allocated with calloc. */ if (new_fcts->tomb == NULL) { if (new_fcts->towc != NULL) @@ -264,12 +206,6 @@ __wcsmbs_load_conv (struct __locale_data *new_category) } else { - // XXX At least for now we live with the CHAR16 not being available. - if (new_fcts->toc16 == NULL) - new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16; - if (new_fcts->fromc16 == NULL) - new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16; - new_category->private.ctype = new_fcts; new_category->private.cleanup = &_nl_cleanup_ctype; } @@ -297,10 +233,6 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy) ++copy->towc->__counter; if (copy->tomb->__shlib_handle != NULL) ++copy->tomb->__counter; - if (copy->toc16->__shlib_handle != NULL) - ++copy->toc16->__counter; - if (copy->fromc16->__shlib_handle != NULL) - ++copy->fromc16->__counter; } @@ -320,19 +252,6 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name) return 1; } - copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", ©->fromc16_nsteps); - if (copy->fromc16 == NULL) - copy->toc16 = NULL; - else - { - copy->toc16 = __wcsmbs_getfct ("CHAR16", name, ©->toc16_nsteps); - if (copy->toc16 == NULL) - { - __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps); - copy->fromc16 = NULL; - } - } - return 0; } @@ -348,8 +267,6 @@ _nl_cleanup_ctype (struct __locale_data *locale) /* Free the old conversions. */ __gconv_close_transform (data->tomb, data->tomb_nsteps); __gconv_close_transform (data->towc, data->towc_nsteps); - __gconv_close_transform (data->fromc16, data->fromc16_nsteps); - __gconv_close_transform (data->toc16, data->toc16_nsteps); free ((char *) data); } } diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h index 064c41c82f..98f53bcc4c 100644 --- a/wcsmbs/wcsmbsload.h +++ b/wcsmbs/wcsmbsload.h @@ -32,11 +32,6 @@ struct gconv_fcts size_t towc_nsteps; struct __gconv_step *tomb; size_t tomb_nsteps; - - struct __gconv_step *toc16; - size_t toc16_nsteps; - struct __gconv_step *fromc16; - size_t fromc16_nsteps; }; /* Set of currently active conversion functions. */ -- 2.34.1