2 static char *rcsid = "$Id: ucsset.c,v 1.1.1.1 2003/06/04 00:26:15 marka Exp $";
6 * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
8 * By using this file, you agree to the terms and conditions set forth bellow.
10 * LICENSE TERMS AND CONDITIONS
12 * The following License Terms and Conditions apply, unless a different
13 * license is obtained from Japan Network Information Center ("JPNIC"),
14 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
15 * Chiyoda-ku, Tokyo 101-0047, Japan.
17 * 1. Use, Modification and Redistribution (including distribution of any
18 * modified or derived work) in source and/or binary forms is permitted
19 * under this License Terms and Conditions.
21 * 2. Redistribution of source code must retain the copyright notices as they
22 * appear in each source code file, this License Terms and Conditions.
24 * 3. Redistribution in binary form must reproduce the Copyright Notice,
25 * this License Terms and Conditions, in the documentation and/or other
26 * materials provided with the distribution. For the purposes of binary
27 * distribution the "Copyright Notice" refers to the following language:
28 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
30 * 4. The name of JPNIC may not be used to endorse or promote products
31 * derived from this Software without specific prior written approval of
34 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
39 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
40 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
41 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
42 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
43 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
44 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
53 #include <idn/result.h>
54 #include <idn/assert.h>
55 #include <idn/logmacro.h>
56 #include <idn/ucsset.h>
58 #define UCS_MAX 0x80000000UL
65 * The set of code points is represented by an array of code point ranges.
66 * In the building phase, specified ranges by 'idn_ucsset_add' or
67 * 'idn_ucsset_addrange' are simply appended to the array.
68 * And 'idn_ucsset_fix' sorts the array by the code point value, and also
69 * merges any intersecting ranges. Since the array is sorted, a binary
70 * search can be used for looking up.
80 * To speed up searching further, the entire region of UCS-4 code points
81 * (U+0000 - U+7FFFFFFF) are divided into segments. For each segment,
82 * the first and last element of the range array corresponding to the
83 * segment are computed by 'idn_ucsset_fix'. This narrows down the
84 * (initial) search range.
87 int range_start; /* index of ucsset.ranges */
88 int range_end; /* ditto */
92 * Code point to segment index conversion.
94 * Below is the function that maps a code point to the corresponding segment.
95 * The mapping is non-uniform, so that BMP, the following 16 planes that
96 * comprise Unicode code points together with BMP, and other planes
97 * have different granularity.
99 #define SEG_THLD1 0x10000 /* BMP */
100 #define SEG_THLD2 0x110000 /* Unicode (BMP+16planes) */
101 #define SEG_SFT1 10 /* BMP: 1K code points/segment */
102 #define SEG_SFT2 14 /* following 16 planes: 16K cp/seg */
103 #define SEG_SFT3 24 /* rest: 16M cp/seg */
104 #define SEG_OFF1 (SEG_THLD1 >> SEG_SFT1)
105 #define SEG_OFF2 (((SEG_THLD2 - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1)
106 #define SEG_INDEX(v) \
107 (((v) < SEG_THLD1) ? ((v) >> SEG_SFT1) : \
108 ((v) < SEG_THLD2) ? ((((v) - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1) : \
109 ((((v) - SEG_THLD2) >> SEG_SFT3) + SEG_OFF2))
110 #define SEG_LEN (SEG_INDEX(UCS_MAX - 1) + 1)
113 * Representation of set of UCS code points.
115 typedef struct idn_ucsset {
116 segment_t segments[SEG_LEN];
118 int size; /* allocated size of 'ranges' */
119 int nranges; /* num of ranges */
121 int refcnt; /* reference count */
124 static idn_result_t addrange(idn_ucsset_t ctx, unsigned long from,
125 unsigned long to, char *func_name);
126 static int comp_range(const void *v1, const void *v2);
129 idn_ucsset_create(idn_ucsset_t *ctx) {
134 TRACE(("idn_ucsset_create()\n"));
136 if ((bm = malloc(sizeof(ucsset))) == NULL) {
137 WARNING(("idn_ucsset_create: malloc failed\n"));
140 bm->size = bm->nranges = 0;
145 return (idn_success);
149 idn_ucsset_destroy(idn_ucsset_t ctx) {
150 assert(ctx != NULL && ctx->refcnt > 0);
152 TRACE(("idn_ucsset_destroy()\n"));
154 if (--ctx->refcnt == 0) {
155 if (ctx->ranges != NULL)
162 idn_ucsset_incrref(idn_ucsset_t ctx) {
163 assert(ctx != NULL && ctx->refcnt > 0);
165 TRACE(("idn_ucsset_incrref()\n"));
171 idn_ucsset_add(idn_ucsset_t ctx, unsigned long v) {
172 assert(ctx != NULL && ctx->refcnt > 0);
174 TRACE(("idn_ucsset_add(v=U+%lX)\n", v));
176 return (addrange(ctx, v, v, "idn_ucsset_add"));
180 idn_ucsset_addrange(idn_ucsset_t ctx, unsigned long from,
183 assert(ctx != NULL && ctx->refcnt > 0);
185 TRACE(("idn_ucsset_addrange(from=U+%lX, to=U+%lX)\n",
188 return (addrange(ctx, from, to, "idn_ucsset_addrange"));
192 idn_ucsset_fix(idn_ucsset_t ctx) {
198 assert(ctx != NULL && ctx->refcnt > 0);
200 TRACE(("idn_ucsset_fix()\n"));
202 nranges = ctx->nranges;
203 ranges = ctx->ranges;
204 segments = ctx->segments;
211 /* Initialize segment array */
212 for (i = 0; i < SEG_LEN; i++) {
213 segments[i].range_start = -1;
214 segments[i].range_end = -1;
217 /* If the set is empty, there's nothing to be done. */
222 qsort(ranges, nranges, sizeof(range_t), comp_range);
224 /* Merge overlapped/continuous ranges. */
225 for (i = 0, j = 1; j < nranges; j++) {
226 if (ranges[i].to + 1 >= ranges[j].from) {
228 if (ranges[i].to < ranges[j].to) {
229 ranges[i].to = ranges[j].to;
234 ranges[i] = ranges[j];
237 /* 'i' points the last range in the array. */
238 ctx->nranges = nranges = ++i;
240 /* Create segment array. */
241 for (i = 0; i < nranges; i++) {
242 int fidx = SEG_INDEX(ranges[i].from);
243 int tidx = SEG_INDEX(ranges[i].to);
245 for (j = fidx; j <= tidx; j++) {
246 if (segments[j].range_start < 0)
247 segments[j].range_start = i;
248 segments[j].range_end = i;
254 * Does the standard guarantee realloc() always succeeds
257 /* Shrink malloc'ed space if possible. */
258 ctx->ranges = realloc(ctx->ranges, ctx->nranges * sizeof(range_t));
263 idn_ucsset_lookup(idn_ucsset_t ctx, unsigned long v, int *found) {
267 assert(ctx != NULL && ctx->refcnt > 0 && found != NULL);
269 TRACE(("idn_ucsset_lookup(v=U+%lX)\n", v));
271 /* Make sure it is fixed. */
273 WARNING(("idn_ucsset_lookup: not fixed yet\n"));
274 return (idn_failure);
277 /* Check the given code point. */
279 return (idn_invalid_codepoint);
281 /* Get the segment 'v' belongs to. */
282 segments = ctx->segments;
285 /* Do binary search. */
287 if (segments[idx].range_start >= 0) {
288 int lo = segments[idx].range_start;
289 int hi = segments[idx].range_end;
290 range_t *ranges = ctx->ranges;
293 int mid = (lo + hi) / 2;
294 if (v < ranges[mid].from) {
296 } else if (v > ranges[mid].to) {
304 return (idn_success);
308 addrange(idn_ucsset_t ctx, unsigned long from, unsigned long to,
313 /* Check the given code points. */
314 if (from > UCS_MAX) {
315 WARNING(("%s: code point out of range (U+%lX)\n",
317 return (idn_invalid_codepoint);
318 } else if (to > UCS_MAX) {
319 WARNING(("%s: code point out of range (U+%lX)\n",
321 return (idn_invalid_codepoint);
322 } else if (from > to) {
323 WARNING(("%s: invalid range spec (U+%lX-U+%lX)\n",
324 func_name, from, to));
325 return (idn_invalid_codepoint);
328 /* Make sure it is not fixed yet. */
330 WARNING(("%s: attempt to add to already fixed object\n",
332 return (idn_failure);
335 /* Append the specified range to the 'ranges' array. */
336 if (ctx->nranges >= ctx->size) {
337 /* Make it bigger. */
339 ctx->size = INIT_SIZE;
342 newbuf = realloc(ctx->ranges, ctx->size * sizeof(range_t));
344 return (idn_nomemory);
345 ctx->ranges = newbuf;
347 ctx->ranges[ctx->nranges].from = from;
348 ctx->ranges[ctx->nranges].to = to;
351 return (idn_success);
355 comp_range(const void *v1, const void *v2) {
357 * Range comparation function suitable for qsort().
359 const range_t *r1 = v1;
360 const range_t *r2 = v2;
362 if (r1->from < r2->from)
364 else if (r1->from > r2->from)