* manual/arith.texi: Document MTASC-safety properties.
[jlayton/glibc.git] / iconvdata / unicode.c
1 /* Conversion module for Unicode
2    Copyright (C) 1999-2014 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19
20 #include <byteswap.h>
21 #include <dlfcn.h>
22 #include <gconv.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 /* This is the Byte Order Mark character (BOM).  */
29 #define BOM     0xfeff
30 /* And in the other endian format.  */
31 #define BOM_OE  0xfffe
32
33
34 /* Definitions used in the body of the `gconv' function.  */
35 #define FROM_LOOP               from_unicode_loop
36 #define TO_LOOP                 to_unicode_loop
37 #define DEFINE_INIT             0
38 #define DEFINE_FINI             0
39 #define MIN_NEEDED_FROM         2
40 #define MIN_NEEDED_TO           4
41 #define FROM_DIRECTION          (dir == from_unicode)
42 #define PREPARE_LOOP \
43   enum direction dir = ((struct unicode_data *) step->__data)->dir;           \
44   int swap;                                                                   \
45   if (FROM_DIRECTION)                                                         \
46     {                                                                         \
47       if (data->__invocation_counter == 0)                                    \
48         {                                                                     \
49           /* We have to find out which byte order the file is encoded in.  */ \
50           if (inptr + 2 > inend)                                              \
51             return (inptr == inend                                            \
52                     ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);        \
53                                                                               \
54           if (get16u (inptr) == BOM)                                          \
55             /* Simply ignore the BOM character.  */                           \
56             *inptrp = inptr += 2;                                             \
57           else if (get16u (inptr) == BOM_OE)                                  \
58             {                                                                 \
59               data->__flags |= __GCONV_SWAP;                                  \
60               *inptrp = inptr += 2;                                           \
61             }                                                                 \
62         }                                                                     \
63     }                                                                         \
64   else if (!data->__internal_use && data->__invocation_counter == 0)          \
65     {                                                                         \
66       /* Emit the Byte Order Mark.  */                                        \
67       if (__builtin_expect (outbuf + 2 > outend, 0))                          \
68         return __GCONV_FULL_OUTPUT;                                           \
69                                                                               \
70       put16u (outbuf, BOM);                                                   \
71       outbuf += 2;                                                            \
72     }                                                                         \
73   swap = data->__flags & __GCONV_SWAP;
74 #define EXTRA_LOOP_ARGS         , swap
75
76
77 /* Direction of the transformation.  */
78 enum direction
79 {
80   illegal_dir,
81   to_unicode,
82   from_unicode
83 };
84
85 struct unicode_data
86 {
87   enum direction dir;
88 };
89
90
91 extern int gconv_init (struct __gconv_step *step);
92 int
93 gconv_init (struct __gconv_step *step)
94 {
95   /* Determine which direction.  */
96   struct unicode_data *new_data;
97   enum direction dir = illegal_dir;
98   int result;
99
100   if (strcmp (step->__from_name, "UNICODE//") == 0)
101     dir = from_unicode;
102   else
103     dir = to_unicode;
104
105   new_data = (struct unicode_data *) malloc (sizeof (struct unicode_data));
106
107   result = __GCONV_NOMEM;
108   if (new_data != NULL)
109     {
110       new_data->dir = dir;
111       step->__data = new_data;
112
113       if (dir == from_unicode)
114         {
115           step->__min_needed_from = MIN_NEEDED_FROM;
116           step->__max_needed_from = MIN_NEEDED_FROM;
117           step->__min_needed_to = MIN_NEEDED_TO;
118           step->__max_needed_to = MIN_NEEDED_TO;
119         }
120       else
121         {
122           step->__min_needed_from = MIN_NEEDED_TO;
123           step->__max_needed_from = MIN_NEEDED_TO;
124           step->__min_needed_to = MIN_NEEDED_FROM;
125           step->__max_needed_to = MIN_NEEDED_FROM;
126         }
127
128       step->__stateful = 0;
129
130       result = __GCONV_OK;
131     }
132
133   return result;
134 }
135
136
137 extern void gconv_end (struct __gconv_step *data);
138 void
139 gconv_end (struct __gconv_step *data)
140 {
141   free (data->__data);
142 }
143
144
145 /* Convert from the internal (UCS4-like) format to UCS2.  */
146 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
147 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
148 #define LOOPFCT                 TO_LOOP
149 #define BODY \
150   {                                                                           \
151     uint32_t c = get32 (inptr);                                               \
152                                                                               \
153     if (__builtin_expect (c >= 0x10000, 0))                                   \
154       {                                                                       \
155         UNICODE_TAG_HANDLER (c, 4);                                           \
156         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
157       }                                                                       \
158     else if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                 \
159       {                                                                       \
160         /* Surrogate characters in UCS-4 input are not valid.                 \
161            We must catch this, because the UCS-2 output might be              \
162            interpreted as UTF-16 by other programs.  If we let                \
163            surrogates pass through, attackers could make a security           \
164            hole exploit by synthesizing any desired plane 1-16                \
165            character.  */                                                     \
166         result = __GCONV_ILLEGAL_INPUT;                                       \
167         if (! ignore_errors_p ())                                             \
168           break;                                                              \
169         inptr += 4;                                                           \
170         ++*irreversible;                                                      \
171         continue;                                                             \
172       }                                                                       \
173     else                                                                      \
174       {                                                                       \
175         put16 (outptr, c);                                                    \
176         outptr += 2;                                                          \
177       }                                                                       \
178                                                                               \
179     inptr += 4;                                                               \
180   }
181 #define LOOP_NEED_FLAGS
182 #define EXTRA_LOOP_DECLS \
183         , int swap
184 #include <iconv/loop.c>
185
186
187 /* Convert from UCS2 to the internal (UCS4-like) format.  */
188 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
189 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
190 #define LOOPFCT                 FROM_LOOP
191 #define BODY \
192   {                                                                           \
193     uint16_t u1 = get16 (inptr);                                              \
194                                                                               \
195     if (swap)                                                                 \
196       u1 = bswap_16 (u1);                                                     \
197                                                                               \
198     if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                    \
199       {                                                                       \
200         /* Surrogate characters in UCS-2 input are not valid.  Reject         \
201            them.  (Catching this here is not security relevant.)  */          \
202         STANDARD_FROM_LOOP_ERR_HANDLER (2);                                   \
203       }                                                                       \
204                                                                               \
205     put32 (outptr, u1);                                                       \
206                                                                               \
207     inptr += 2;                                                               \
208     outptr += 4;                                                              \
209   }
210 #define LOOP_NEED_FLAGS
211 #define EXTRA_LOOP_DECLS \
212         , int swap
213 #include <iconv/loop.c>
214
215
216 /* Now define the toplevel functions.  */
217 #include <iconv/skeleton.c>