Update copyright notices with scripts/update-copyrights
[jlayton/glibc.git] / iconvdata / euc-jisx0213.c
1 /* Conversion from and to EUC-JISX0213.
2    Copyright (C) 2002-2014 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Bruno Haible <bruno@clisp.org>, 2002.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19
20 #include <dlfcn.h>
21 #include <stdint.h>
22 #include <gconv.h>
23
24 /* The structure of EUC-JISX0213 is as follows:
25
26    0x00..0x7F: ASCII
27
28    0x8E{A1..FE}: JISX0201 Katakana, with prefix 0x8E, offset by +0x80.
29
30    0x8F{A1..FE}{A1..FE}: JISX0213 plane 2, with prefix 0x8F, offset by +0x8080.
31
32    0x{A1..FE}{A1..FE}: JISX0213 plane 1, offset by +0x8080.
33
34    Note that some JISX0213 characters are not contained in Unicode 3.2
35    and are therefore best represented as sequences of Unicode characters.
36 */
37
38 #include "jisx0213.h"
39
40 /* Definitions used in the body of the `gconv' function.  */
41 #define CHARSET_NAME            "EUC-JISX0213//"
42 #define FROM_LOOP               from_euc_jisx0213
43 #define TO_LOOP                 to_euc_jisx0213
44 #define DEFINE_INIT             1
45 #define DEFINE_FINI             1
46 #define FROM_LOOP_MIN_NEEDED_FROM       1
47 #define FROM_LOOP_MAX_NEEDED_FROM       3
48 #define FROM_LOOP_MIN_NEEDED_TO         4
49 #define FROM_LOOP_MAX_NEEDED_TO         8
50 #define TO_LOOP_MIN_NEEDED_FROM         4
51 #define TO_LOOP_MAX_NEEDED_FROM         4
52 #define TO_LOOP_MIN_NEEDED_TO           1
53 #define TO_LOOP_MAX_NEEDED_TO           3
54 #define PREPARE_LOOP \
55   int saved_state;                                                            \
56   int *statep = &data->__statep->__count;
57 #define EXTRA_LOOP_ARGS         , statep
58
59
60 /* Since we might have to reset input pointer we must be able to save
61    and restore the state.  */
62 #define SAVE_RESET_STATE(Save) \
63   if (Save)                                                                   \
64     saved_state = *statep;                                                    \
65   else                                                                        \
66     *statep = saved_state
67
68
69 /* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state
70    contains the last UCS-4 character, shifted by 3 bits.
71    During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
72    contains the last two bytes to be output, shifted by 3 bits.  */
73
74 /* Since this is a stateful encoding we have to provide code which resets
75    the output state to the initial state.  This has to be done during the
76    flushing.  */
77 #define EMIT_SHIFT_TO_INIT \
78   if (data->__statep->__count != 0)                                           \
79     {                                                                         \
80       if (FROM_DIRECTION)                                                     \
81         {                                                                     \
82           if (__builtin_expect (outbuf + 4 <= outend, 1))                     \
83             {                                                                 \
84               /* Write out the last character.  */                            \
85               *((uint32_t *) outbuf) = data->__statep->__count >> 3;          \
86               outbuf += sizeof (uint32_t);                                    \
87               data->__statep->__count = 0;                                    \
88             }                                                                 \
89           else                                                                \
90             /* We don't have enough room in the output buffer.  */            \
91             status = __GCONV_FULL_OUTPUT;                                     \
92         }                                                                     \
93       else                                                                    \
94         {                                                                     \
95           if (__builtin_expect (outbuf + 2 <= outend, 1))                     \
96             {                                                                 \
97               /* Write out the last character.  */                            \
98               uint32_t lasttwo = data->__statep->__count >> 3;                \
99               *outbuf++ = (lasttwo >> 8) & 0xff;                              \
100               *outbuf++ = lasttwo & 0xff;                                     \
101               data->__statep->__count = 0;                                    \
102             }                                                                 \
103           else                                                                \
104             /* We don't have enough room in the output buffer.  */            \
105             status = __GCONV_FULL_OUTPUT;                                     \
106         }                                                                     \
107     }
108
109
110 /* First define the conversion function from EUC-JISX0213 to UCS-4.  */
111 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
112 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
113 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
114 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
115 #define LOOPFCT                 FROM_LOOP
116 #define BODY \
117   {                                                                           \
118     uint32_t ch;                                                              \
119                                                                               \
120     /* Determine whether there is a buffered character pending.  */           \
121     ch = *statep >> 3;                                                        \
122     if (__builtin_expect (ch == 0, 1))                                        \
123       {                                                                       \
124         /* No - so look at the next input byte.  */                           \
125         ch = *inptr;                                                          \
126                                                                               \
127         if (ch < 0x80)                                                        \
128           /* Plain ASCII character.  */                                       \
129           ++inptr;                                                            \
130         else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f)      \
131           {                                                                   \
132             /* Two or three byte character.  */                               \
133             uint32_t ch2;                                                     \
134                                                                               \
135             if (__builtin_expect (inptr + 1 >= inend, 0))                     \
136               {                                                               \
137                 /* The second byte is not available.  */                      \
138                 result = __GCONV_INCOMPLETE_INPUT;                            \
139                 break;                                                        \
140               }                                                               \
141                                                                               \
142             ch2 = inptr[1];                                                   \
143                                                                               \
144             /* The second byte must be >= 0xa1 and <= 0xfe.  */               \
145             if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0))               \
146               {                                                               \
147                 /* This is an illegal character.  */                          \
148                 STANDARD_FROM_LOOP_ERR_HANDLER (1);                           \
149               }                                                               \
150                                                                               \
151             if (ch == 0x8e)                                                   \
152               {                                                               \
153                 /* Half-width katakana.  */                                   \
154                 if (__builtin_expect (ch2 > 0xdf, 0))                         \
155                   STANDARD_FROM_LOOP_ERR_HANDLER (1);                         \
156                                                                               \
157                 ch = ch2 + 0xfec0;                                            \
158                 inptr += 2;                                                   \
159               }                                                               \
160             else                                                              \
161               {                                                               \
162                 const unsigned char *endp;                                    \
163                                                                               \
164                 if (ch == 0x8f)                                               \
165                   {                                                           \
166                     /* JISX 0213 plane 2.  */                                 \
167                     uint32_t ch3;                                             \
168                                                                               \
169                     if (__builtin_expect (inptr + 2 >= inend, 0))             \
170                       {                                                       \
171                         /* The third byte is not available.  */               \
172                         result = __GCONV_INCOMPLETE_INPUT;                    \
173                         break;                                                \
174                       }                                                       \
175                                                                               \
176                     ch3 = inptr[2];                                           \
177                     endp = inptr + 3;                                         \
178                                                                               \
179                     ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80);   \
180                   }                                                           \
181                 else                                                          \
182                   {                                                           \
183                     /* JISX 0213 plane 1.  */                                 \
184                     endp = inptr + 2;                                         \
185                                                                               \
186                     ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80);    \
187                   }                                                           \
188                                                                               \
189                 if (ch == 0)                                                  \
190                   /* This is an illegal character.  */                        \
191                   STANDARD_FROM_LOOP_ERR_HANDLER (1);                         \
192                                                                               \
193                 inptr = endp;                                                 \
194                                                                               \
195                 if (ch < 0x80)                                                \
196                   {                                                           \
197                     /* It's a combining character.  */                        \
198                     uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];     \
199                     uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];     \
200                                                                               \
201                     put32 (outptr, u1);                                       \
202                     outptr += 4;                                              \
203                                                                               \
204                     /* See whether we have room for two characters.  */       \
205                     if (outptr + 4 <= outend)                                 \
206                       {                                                       \
207                         put32 (outptr, u2);                                   \
208                         outptr += 4;                                          \
209                         continue;                                             \
210                       }                                                       \
211                                                                               \
212                     /* Otherwise store only the first character now, and      \
213                        put the second one into the queue.  */                 \
214                     *statep = u2 << 3;                                        \
215                     /* Tell the caller why we terminate the loop.  */         \
216                     result = __GCONV_FULL_OUTPUT;                             \
217                     break;                                                    \
218                   }                                                           \
219               }                                                               \
220           }                                                                   \
221         else                                                                  \
222           {                                                                   \
223             /* This is illegal.  */                                           \
224             STANDARD_FROM_LOOP_ERR_HANDLER (1);                               \
225           }                                                                   \
226       }                                                                       \
227                                                                               \
228     put32 (outptr, ch);                                                       \
229     outptr += 4;                                                              \
230   }
231 #define LOOP_NEED_FLAGS
232 #define EXTRA_LOOP_DECLS        , int *statep
233 #define ONEBYTE_BODY \
234   {                                                                           \
235     if (c < 0x80)                                                             \
236       return c;                                                               \
237     else                                                                      \
238       return WEOF;                                                            \
239   }
240 #include <iconv/loop.c>
241
242
243 /* Next, define the other direction, from UCS-4 to EUC-JISX0213.  */
244
245 /* Composition tables for each of the relevant combining characters.  */
246 static const struct
247 {
248   uint16_t base;
249   uint16_t composed;
250 } comp_table_data[] =
251 {
252 #define COMP_TABLE_IDX_02E5 0
253 #define COMP_TABLE_LEN_02E5 1
254   { 0xabe4, 0xabe5 }, /* 0x12B65 = 0x12B64 U+02E5 */
255 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
256 #define COMP_TABLE_LEN_02E9 1
257   { 0xabe0, 0xabe6 }, /* 0x12B66 = 0x12B60 U+02E9 */
258 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
259 #define COMP_TABLE_LEN_0300 5
260   { 0xa9dc, 0xabc4 }, /* 0x12B44 = 0x1295C U+0300 */
261   { 0xabb8, 0xabc8 }, /* 0x12B48 = 0x12B38 U+0300 */
262   { 0xabb7, 0xabca }, /* 0x12B4A = 0x12B37 U+0300 */
263   { 0xabb0, 0xabcc }, /* 0x12B4C = 0x12B30 U+0300 */
264   { 0xabc3, 0xabce }, /* 0x12B4E = 0x12B43 U+0300 */
265 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
266 #define COMP_TABLE_LEN_0301 4
267   { 0xabb8, 0xabc9 }, /* 0x12B49 = 0x12B38 U+0301 */
268   { 0xabb7, 0xabcb }, /* 0x12B4B = 0x12B37 U+0301 */
269   { 0xabb0, 0xabcd }, /* 0x12B4D = 0x12B30 U+0301 */
270   { 0xabc3, 0xabcf }, /* 0x12B4F = 0x12B43 U+0301 */
271 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
272 #define COMP_TABLE_LEN_309A 14
273   { 0xa4ab, 0xa4f7 }, /* 0x12477 = 0x1242B U+309A */
274   { 0xa4ad, 0xa4f8 }, /* 0x12478 = 0x1242D U+309A */
275   { 0xa4af, 0xa4f9 }, /* 0x12479 = 0x1242F U+309A */
276   { 0xa4b1, 0xa4fa }, /* 0x1247A = 0x12431 U+309A */
277   { 0xa4b3, 0xa4fb }, /* 0x1247B = 0x12433 U+309A */
278   { 0xa5ab, 0xa5f7 }, /* 0x12577 = 0x1252B U+309A */
279   { 0xa5ad, 0xa5f8 }, /* 0x12578 = 0x1252D U+309A */
280   { 0xa5af, 0xa5f9 }, /* 0x12579 = 0x1252F U+309A */
281   { 0xa5b1, 0xa5fa }, /* 0x1257A = 0x12531 U+309A */
282   { 0xa5b3, 0xa5fb }, /* 0x1257B = 0x12533 U+309A */
283   { 0xa5bb, 0xa5fc }, /* 0x1257C = 0x1253B U+309A */
284   { 0xa5c4, 0xa5fd }, /* 0x1257D = 0x12544 U+309A */
285   { 0xa5c8, 0xa5fe }, /* 0x1257E = 0x12548 U+309A */
286   { 0xa6f5, 0xa6f8 }, /* 0x12678 = 0x12675 U+309A */
287 };
288
289 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
290 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
291 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
292 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
293 #define LOOPFCT                 TO_LOOP
294 #define BODY \
295   {                                                                           \
296     uint32_t ch = get32 (inptr);                                              \
297                                                                               \
298     if ((*statep >> 3) != 0)                                                  \
299       {                                                                       \
300         /* Attempt to combine the last character with this one.  */           \
301         uint16_t lasttwo = *statep >> 3;                                      \
302         unsigned int idx;                                                     \
303         unsigned int len;                                                     \
304                                                                               \
305         if (ch == 0x02e5)                                                     \
306           idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;               \
307         else if (ch == 0x02e9)                                                \
308           idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;               \
309         else if (ch == 0x0300)                                                \
310           idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;               \
311         else if (ch == 0x0301)                                                \
312           idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;               \
313         else if (ch == 0x309a)                                                \
314           idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;               \
315         else                                                                  \
316           goto not_combining;                                                 \
317                                                                               \
318         do                                                                    \
319           if (comp_table_data[idx].base == lasttwo)                           \
320             break;                                                            \
321         while (++idx, --len > 0);                                             \
322                                                                               \
323         if (len > 0)                                                          \
324           {                                                                   \
325             /* Output the combined character.  */                             \
326             if (__builtin_expect (outptr + 1 >= outend, 0))                   \
327               {                                                               \
328                 result = __GCONV_FULL_OUTPUT;                                 \
329                 break;                                                        \
330               }                                                               \
331             lasttwo = comp_table_data[idx].composed;                          \
332             *outptr++ = (lasttwo >> 8) & 0xff;                                \
333             *outptr++ = lasttwo & 0xff;                                       \
334             *statep = 0;                                                      \
335             inptr += 4;                                                       \
336             continue;                                                         \
337           }                                                                   \
338                                                                               \
339       not_combining:                                                          \
340         /* Output the buffered character.  */                                 \
341         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
342           {                                                                   \
343             result = __GCONV_FULL_OUTPUT;                                     \
344             break;                                                            \
345           }                                                                   \
346         *outptr++ = (lasttwo >> 8) & 0xff;                                    \
347         *outptr++ = lasttwo & 0xff;                                           \
348         *statep = 0;                                                          \
349         continue;                                                             \
350       }                                                                       \
351                                                                               \
352     if (ch < 0x80)                                                            \
353       /* Plain ASCII character.  */                                           \
354       *outptr++ = ch;                                                         \
355     else if (ch >= 0xff61 && ch <= 0xff9f)                                    \
356       {                                                                       \
357         /* Half-width katakana.  */                                           \
358         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
359           {                                                                   \
360             result = __GCONV_FULL_OUTPUT;                                     \
361             break;                                                            \
362           }                                                                   \
363         *outptr++ = 0x8e;                                                     \
364         *outptr++ = ch - 0xfec0;                                              \
365       }                                                                       \
366     else                                                                      \
367       {                                                                       \
368         uint32_t jch = ucs4_to_jisx0213 (ch);                                 \
369         if (jch == 0)                                                         \
370           {                                                                   \
371             UNICODE_TAG_HANDLER (ch, 4);                                      \
372                                                                               \
373             /* Illegal character.  */                                         \
374             STANDARD_TO_LOOP_ERR_HANDLER (4);                                 \
375           }                                                                   \
376                                                                               \
377         if (jch & 0x0080)                                                     \
378           {                                                                   \
379             /* A possible match in comp_table_data.  We have to buffer it.  */\
380                                                                               \
381             /* We know it's a JISX 0213 plane 1 character.  */                \
382             assert ((jch & 0x8000) == 0);                                     \
383                                                                               \
384             *statep = (jch | 0x8080) << 3;                                    \
385             inptr += 4;                                                       \
386             continue;                                                         \
387           }                                                                   \
388                                                                               \
389         if (jch & 0x8000)                                                     \
390           {                                                                   \
391             /* JISX 0213 plane 2.  */                                         \
392             if (__builtin_expect (outptr + 2 >= outend, 0))                   \
393               {                                                               \
394                 result = __GCONV_FULL_OUTPUT;                                 \
395                 break;                                                        \
396               }                                                               \
397             *outptr++ = 0x8f;                                                 \
398           }                                                                   \
399         else                                                                  \
400           {                                                                   \
401             /* JISX 0213 plane 1.  */                                         \
402             if (__builtin_expect (outptr + 1 >= outend, 0))                   \
403               {                                                               \
404                 result = __GCONV_FULL_OUTPUT;                                 \
405                 break;                                                        \
406               }                                                               \
407           }                                                                   \
408         *outptr++ = (jch >> 8) | 0x80;                                        \
409         *outptr++ = (jch & 0xff) | 0x80;                                      \
410       }                                                                       \
411                                                                               \
412     inptr += 4;                                                               \
413   }
414 #define LOOP_NEED_FLAGS
415 #define EXTRA_LOOP_DECLS        , int *statep
416 #include <iconv/loop.c>
417
418
419 /* Now define the toplevel functions.  */
420 #include <iconv/skeleton.c>