2 static char *rcsid = "$Id: race.c,v 1.1.1.1 2003/06/04 00:26:07 marka Exp $";
6 * Copyright (c) 2000,2001,2002 Japan Network Information Center.
9 * By using this file, you agree to the terms and conditions set forth bellow.
11 * LICENSE TERMS AND CONDITIONS
13 * The following License Terms and Conditions apply, unless a different
14 * license is obtained from Japan Network Information Center ("JPNIC"),
15 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
16 * Chiyoda-ku, Tokyo 101-0047, Japan.
18 * 1. Use, Modification and Redistribution (including distribution of any
19 * modified or derived work) in source and/or binary forms is permitted
20 * under this License Terms and Conditions.
22 * 2. Redistribution of source code must retain the copyright notices as they
23 * appear in each source code file, this License Terms and Conditions.
25 * 3. Redistribution in binary form must reproduce the Copyright Notice,
26 * this License Terms and Conditions, in the documentation and/or other
27 * materials provided with the distribution. For the purposes of binary
28 * distribution the "Copyright Notice" refers to the following language:
29 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
31 * 4. The name of JPNIC may not be used to endorse or promote products
32 * derived from this Software without specific prior written approval of
35 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
38 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
40 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
41 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
42 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
43 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
44 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
45 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
54 #include <idn/result.h>
55 #include <idn/assert.h>
56 #include <idn/logmacro.h>
57 #include <idn/converter.h>
59 #include <idn/debug.h>
63 #ifndef IDN_RACE_PREFIX
64 #define IDN_RACE_PREFIX "bq--"
66 #define RACE_2OCTET_MODE 0xd8
67 #define RACE_ESCAPE 0xff
68 #define RACE_ESCAPE_2ND 0x99
70 #define RACE_BUF_SIZE 128 /* more than enough */
73 * Unicode surrogate pair.
75 #define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff)
76 #define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff)
77 #define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
78 #define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff))
79 #define SURROGATE_BASE 0x10000
80 #define SURROGATE_H_OFF 0xd800
81 #define SURROGATE_L_OFF 0xdc00
82 #define COMBINE_SURROGATE(h, l) \
83 (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
89 compress_one, /* all characters are in a single row */
90 compress_two, /* row 0 and another row */
91 compress_none /* nope */
94 static idn_result_t race_decode_decompress(const char *from,
97 static idn_result_t race_compress_encode(const unsigned short *p,
99 char *to, size_t tolen);
100 static int get_compress_mode(unsigned short *p);
103 idn__race_decode(idn_converter_t ctx, void *privdata,
104 const char *from, unsigned long *to, size_t tolen) {
105 unsigned short *buf = NULL;
106 size_t prefixlen = strlen(IDN_RACE_PREFIX);
113 TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n",
114 idn__debug_xstring(from, 50), (int)tolen));
116 if (!idn__util_asciihaveaceprefix(from, IDN_RACE_PREFIX)) {
118 r = idn_ucs4_utf8toucs4(from, to, tolen);
121 r = idn_invalid_encoding;
125 fromlen = strlen(from);
128 * Allocate sufficient buffer.
130 buflen = fromlen + 1;
131 buf = malloc(sizeof(*buf) * buflen);
138 * Decode base32 and decompress.
140 r = race_decode_decompress(from, buf, buflen);
141 if (r != idn_success)
145 * Now 'buf' points the decompressed string, which must contain
152 r = idn_ucs4_utf16toucs4(buf, to, tolen);
153 if (r != idn_success)
158 if (r == idn_success) {
159 TRACE(("idn__race_decode(): succcess (to=\"%s\")\n",
160 idn__debug_ucs4xstring(to, 50)));
162 TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r)));
168 race_decode_decompress(const char *from, unsigned short *buf, size_t buflen)
170 unsigned short *p = buf;
171 unsigned int bitbuf = 0;
176 while (*from != '\0') {
180 if ('a' <= c && c <= 'z')
182 else if ('A' <= c && c <= 'Z')
184 else if ('2' <= c && c <= '7')
187 return (idn_invalid_encoding);
189 bitbuf = (bitbuf << 5) + x;
192 *p++ = (bitbuf >> (bitlen - 8)) & 0xff;
199 * Now 'buf' holds the decoded string.
205 if (buf[0] == RACE_2OCTET_MODE) {
206 if ((len - 1) % 2 != 0)
207 return (idn_invalid_encoding);
208 for (i = 1, j = 0; i < len; i += 2, j++)
209 buf[j] = (buf[i] << 8) + buf[i + 1];
212 unsigned short c = buf[0] << 8; /* higher octet */
214 for (i = 1, j = 0; i < len; j++) {
215 if (buf[i] == RACE_ESCAPE) {
217 return (idn_invalid_encoding);
218 else if (buf[i + 1] == RACE_ESCAPE_2ND)
224 } else if (buf[i] == 0x99 && c == 0x00) {
226 * The RACE specification says this is error.
228 return (idn_invalid_encoding);
231 buf[j] = c | buf[i++];
238 return (idn_success);
242 idn__race_encode(idn_converter_t ctx, void *privdata,
243 const unsigned long *from, char *to, size_t tolen) {
245 unsigned short *p, *buf = NULL;
246 size_t prefixlen = strlen(IDN_RACE_PREFIX);
254 TRACE(("idn__race_encode(from=\"%s\", tolen=%d)\n",
255 idn__debug_ucs4xstring(from, 50), (int)tolen));
258 r = idn_ucs4_ucs4toutf8(from, to, tolen);
260 } else if (idn__util_ucs4haveaceprefix(from, IDN_RACE_PREFIX)) {
265 if (tolen < prefixlen) {
266 r = idn_buffer_overflow;
269 memcpy(to, IDN_RACE_PREFIX, prefixlen);
273 fromlen = idn_ucs4_strlen(from);
274 buflen = fromlen * 2 + 2;
278 * Preserve space for a character at the top of the buffer.
281 unsigned short *new_buf;
283 new_buf = realloc(buf, sizeof(*buf) * buflen);
284 if (new_buf == NULL) {
290 r = idn_ucs4_ucs4toutf16(from, buf + 1, buflen - 1);
291 if (r == idn_success)
293 else if (r != idn_buffer_overflow)
296 buflen = fromlen * 2 + 2;
301 * Now 'p' contains UTF-16 encoded string.
306 * RACE doesn't permit U+0099 in an input string.
308 for (p = buf + 1; *p != '\0'; p++) {
310 r = idn_invalid_encoding;
316 * Compress, encode in base-32 and output.
318 compress_mode = get_compress_mode(buf + 1);
319 r = race_compress_encode(buf, compress_mode, to, tolen);
323 if (r == idn_success) {
324 TRACE(("idn__race_encode(): succcess (to=\"%s\")\n",
325 idn__debug_xstring(to_org, 50)));
327 TRACE(("idn__race_encode(): %s\n", idn_result_tostring(r)));
333 race_compress_encode(const unsigned short *p, int compress_mode,
334 char *to, size_t tolen)
336 unsigned long bitbuf = *p++; /* bit stream buffer */
337 int bitlen = 8; /* # of bits in 'bitbuf' */
339 while (*p != '\0' || bitlen > 0) {
343 /* End of data. Flush. */
344 bitbuf <<= (5 - bitlen);
346 } else if (compress_mode == compress_none) {
347 /* Push 16 bit data. */
348 bitbuf = (bitbuf << 16) | c;
351 } else {/* compress_mode == compress_one/compress_two */
352 /* Push 8 or 16 bit data. */
353 if (compress_mode == compress_two &&
355 /* Upper octet is zero (and not U1). */
356 bitbuf = (bitbuf << 16) | 0xff00 | c;
358 } else if ((c & 0xff) == 0xff) {
359 /* Lower octet is 0xff. */
360 bitbuf = (bitbuf << 16) |
361 (RACE_ESCAPE << 8) | RACE_ESCAPE_2ND;
364 /* Just output lower octet. */
365 bitbuf = (bitbuf << 8) | (c & 0xff);
372 * Output bits in 'bitbuf' in 5-bit unit.
374 while (bitlen >= 5) {
377 /* Get top 5 bits. */
378 x = (bitbuf >> (bitlen - 5)) & 0x1f;
388 return (idn_buffer_overflow);
396 return (idn_buffer_overflow);
399 return (idn_success);
403 get_compress_mode(unsigned short *p) {
405 unsigned int upper = 0;
406 unsigned short *modepos = p - 1;
409 unsigned int hi = *p++ & 0xff00;
413 } else if (hi == upper) {
415 } else if (upper == 0) {
418 *modepos = RACE_2OCTET_MODE;
419 return (compress_none);
422 *modepos = upper >> 8;
423 if (upper > 0 && zero > 0)
424 return (compress_two);
426 return (compress_one);