1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
32 # define _RE_DEFINE_LOCALE_FUNCTIONS 1
33 # include <locale/localeinfo.h>
34 # include <locale/elem-hash.h>
35 # include <locale/coll-lookup.h>
39 /* This is for other GNU distributions with internationalized messages. */
40 #if HAVE_LIBINTL_H || defined _LIBC
44 # define gettext(msgid) \
45 INTUSE(__dcgettext) (_libc_intl_domainname_internal, msgid, LC_MESSAGES)
48 # define gettext(msgid) (msgid)
52 /* This define is so xgettext can find the internationalizable
54 # define gettext_noop(String) String
58 #include "regex_internal.h"
60 static void re_string_construct_common (const unsigned char *str,
61 int len, re_string_t *pstr);
63 static reg_errcode_t build_wcs_buffer (re_string_t *pstr);
64 static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
65 #endif /* RE_ENABLE_I18N */
66 static reg_errcode_t build_upper_buffer (re_string_t *pstr);
67 static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
68 RE_TRANSLATE_TYPE trans);
69 static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
70 const re_node_set *nodes,
72 static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
74 static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
75 const re_node_set *nodes,
77 static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
78 const re_node_set *nodes,
81 static unsigned int inline calc_state_hash (const re_node_set *nodes,
82 unsigned int context);
84 /* Functions for string operation. */
86 /* Construct string object. */
88 re_string_construct (pstr, str, len, trans)
90 const unsigned char *str;
92 RE_TRANSLATE_TYPE trans;
95 re_string_construct_common (str, len, pstr);
97 if (MB_CUR_MAX >1 && pstr->len > 0)
99 ret = build_wcs_buffer (pstr);
100 if (BE (ret != REG_NOERROR, 0))
103 #endif /* RE_ENABLE_I18N */
104 pstr->mbs_case = str;
107 ret = re_string_translate_buffer (pstr, trans);
108 if (BE (ret != REG_NOERROR, 0))
114 /* Construct string object. We use this function instead of
115 re_string_construct for case insensitive mode. */
118 re_string_construct_toupper (pstr, str, len, trans)
120 const unsigned char *str;
122 RE_TRANSLATE_TYPE trans;
125 /* Set case sensitive buffer. */
126 re_string_construct_common (str, len, pstr);
127 #ifdef RE_ENABLE_I18N
130 if (BE (pstr->len > 0, 1))
132 ret = build_wcs_upper_buffer (pstr);
133 if (BE (ret != REG_NOERROR, 0))
138 #endif /* RE_ENABLE_I18N */
140 if (BE (pstr->len > 0, 1))
142 ret = build_upper_buffer (pstr);
143 if (BE (ret != REG_NOERROR, 0))
147 pstr->mbs_case = str;
150 ret = re_string_translate_buffer (pstr, trans);
151 if (BE (ret != REG_NOERROR, 0))
157 /* Helper functions for re_string_construct_*. */
159 re_string_construct_common (str, len, pstr)
160 const unsigned char *str;
167 #ifdef RE_ENABLE_I18N
170 pstr->mbs_case = NULL;
172 pstr->mbs_case_alloc = 0;
175 #ifdef RE_ENABLE_I18N
177 /* Build wide character buffer for `pstr'.
178 If the byte sequence of the string are:
179 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
180 Then wide character buffer will be:
181 <wc1> , WEOF , <wc2> , WEOF , <wc3>
182 We use WEOF for padding, they indicate that the position isn't
183 a first byte of a multibyte character. */
186 build_wcs_buffer (pstr)
189 mbstate_t state, prev_st;
191 int char_idx, char_len, mbclen;
193 pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
194 if (BE (pstr->wcs == NULL, 0))
197 memset (&state, '\0', sizeof (mbstate_t));
198 char_len = pstr->len;
199 for (char_idx = 0; char_idx < char_len ;)
201 int next_idx, remain_len = char_len - char_idx;
203 mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
204 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
205 /* We treat these cases as a singlebyte character. */
208 wc = (wchar_t) pstr->mbs[char_idx++];
211 /* Write wide character and padding. */
212 pstr->wcs[char_idx++] = wc;
213 for (next_idx = char_idx + mbclen - 1; char_idx < next_idx ;)
214 pstr->wcs[char_idx++] = WEOF;
220 build_wcs_upper_buffer (pstr)
223 mbstate_t state, prev_st;
225 unsigned char *mbs_upper;
226 int char_idx, char_len, mbclen;
228 pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
229 mbs_upper = re_malloc (unsigned char, pstr->len + 1);
230 if (BE (pstr->wcs == NULL || mbs_upper == NULL, 0))
236 memset (&state, '\0', sizeof (mbstate_t));
237 char_len = pstr->len;
238 for (char_idx = 0 ; char_idx < char_len ; char_idx += mbclen)
240 int byte_idx, remain_len = char_len - char_idx;
242 mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
245 pstr->wcs[char_idx] = wc;
246 if (islower (pstr->mbs[char_idx]))
247 mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
249 mbs_upper[char_idx] = pstr->mbs[char_idx];
251 else if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1
253 /* We treat these cases as a singlebyte character. */
256 pstr->wcs[char_idx] = (wchar_t) pstr->mbs[char_idx];
257 mbs_upper[char_idx] = pstr->mbs[char_idx];
260 else /* mbclen > 1 */
262 pstr->wcs[char_idx] = wc;
264 wcrtomb (mbs_upper + char_idx, towupper (wc), &prev_st);
266 memcpy (mbs_upper + char_idx, pstr->mbs + char_idx, mbclen);
267 for (byte_idx = 1 ; byte_idx < mbclen ; byte_idx++)
268 pstr->wcs[char_idx + byte_idx] = WEOF;
271 pstr->mbs = mbs_upper;
275 #endif /* RE_ENABLE_I18N */
278 build_upper_buffer (pstr)
281 unsigned char *mbs_upper;
282 int char_idx, char_len;
284 mbs_upper = re_malloc (unsigned char, pstr->len + 1);
285 if (BE (mbs_upper == NULL, 0))
288 char_len = pstr->len;
289 for (char_idx = 0 ; char_idx < char_len ; char_idx ++)
291 if (islower (pstr->mbs[char_idx]))
292 mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
294 mbs_upper[char_idx] = pstr->mbs[char_idx];
296 pstr->mbs = mbs_upper;
301 /* Apply TRANS to the buffer in PSTR. We assume that wide char buffer
302 is already constructed if MB_CUR_MAX > 1. */
305 re_string_translate_buffer (pstr, trans)
307 RE_TRANSLATE_TYPE trans;
310 unsigned char *transed_buf, *transed_case_buf;
312 assert (trans != NULL);
316 transed_buf = (unsigned char *) pstr->mbs;
317 transed_case_buf = re_malloc (unsigned char, pstr->len + 1);
318 if (BE (transed_case_buf == NULL, 0))
320 pstr->mbs_case_alloc = 1;
324 transed_buf = re_malloc (unsigned char, pstr->len + 1);
325 if (BE (transed_buf == NULL, 0))
327 transed_case_buf = NULL;
330 for (buf_idx = 0 ; buf_idx < pstr->len ; buf_idx++)
332 #ifdef RE_ENABLE_I18N
333 if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
334 transed_buf[buf_idx] = pstr->mbs[buf_idx];
337 transed_buf[buf_idx] = trans[pstr->mbs[buf_idx]];
338 if (transed_case_buf)
340 #ifdef RE_ENABLE_I18N
341 if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
342 transed_case_buf[buf_idx] = pstr->mbs_case[buf_idx];
345 transed_case_buf[buf_idx] = trans[pstr->mbs_case[buf_idx]];
348 if (pstr->mbs_case_alloc == 1)
350 pstr->mbs = transed_buf;
351 pstr->mbs_case = transed_case_buf;
355 pstr->mbs = transed_buf;
356 pstr->mbs_case = transed_buf;
362 re_string_destruct (pstr)
365 #ifdef RE_ENABLE_I18N
367 #endif /* RE_ENABLE_I18N */
369 re_free ((void *) pstr->mbs);
370 if (pstr->mbs_case_alloc)
371 re_free ((void *) pstr->mbs_case);
374 /* Return the context at IDX in INPUT. */
376 re_string_context_at (input, idx, eflags, newline_anchor)
377 const re_string_t *input;
378 int idx, eflags, newline_anchor;
381 if (idx < 0 || idx == input->len)
383 unsigned int context = 0;
385 context = CONTEXT_BEGBUF;
386 else /* (idx == input->len) */
387 context = CONTEXT_ENDBUF;
389 if ((idx < 0 && !(eflags & REG_NOTBOL))
390 || (idx == input->len && !(eflags & REG_NOTEOL)))
391 return CONTEXT_NEWLINE | context;
395 c = re_string_byte_at (input, idx);
396 if (IS_WORD_CHAR (c))
398 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
401 /* Functions for set operation. */
404 re_node_set_alloc (set, size)
410 set->elems = re_malloc (int, size);
411 if (BE (set->elems == NULL, 0))
417 re_node_set_init_1 (set, elem)
423 set->elems = re_malloc (int, 1);
424 if (BE (set->elems == NULL, 0))
426 set->elems[0] = elem;
431 re_node_set_init_2 (set, elem1, elem2)
436 set->elems = re_malloc (int, 2);
437 if (BE (set->elems == NULL, 0))
442 set->elems[0] = elem1;
449 set->elems[0] = elem1;
450 set->elems[1] = elem2;
454 set->elems[0] = elem2;
455 set->elems[1] = elem1;
462 re_node_set_init_copy (dest, src)
464 const re_node_set *src;
466 dest->nelem = src->nelem;
469 dest->alloc = dest->nelem;
470 dest->elems = re_malloc (int, dest->alloc);
471 if (BE (dest->elems == NULL, 0))
473 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
476 re_node_set_init_empty (dest);
480 /* Calculate the intersection of the sets SRC1 and SRC2. And store it in
481 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
482 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
485 re_node_set_intersect (dest, src1, src2)
487 const re_node_set *src1, *src2;
490 if (src1->nelem > 0 && src2->nelem > 0)
492 if (src1->nelem + src2->nelem > dest->alloc)
494 dest->alloc = src1->nelem + src2->nelem;
495 dest->elems = re_realloc (dest->elems, int, dest->alloc);
496 if (BE (dest->elems == NULL, 0))
502 /* The intersection of empty sets is also empty set. */
507 for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; )
509 if (src1->elems[i1] > src2->elems[i2])
514 /* The intersection must have the elements which are in both of
516 if (src1->elems[i1] == src2->elems[i2])
517 dest->elems[id++] = src2->elems[i2++];
524 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
525 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
526 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
529 re_node_set_add_intersect (dest, src1, src2)
531 const re_node_set *src1, *src2;
534 if (src1->nelem > 0 && src2->nelem > 0)
536 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
538 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
539 dest->elems = re_realloc (dest->elems, int, dest->alloc);
540 if (BE (dest->elems == NULL, 0))
547 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
549 if (src1->elems[i1] > src2->elems[i2])
554 if (src1->elems[i1] == src2->elems[i2])
556 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
558 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
562 memmove (dest->elems + id + 1, dest->elems + id,
563 sizeof (int) * (dest->nelem - id));
564 dest->elems[id++] = src2->elems[i2++];
573 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
574 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
577 re_node_set_init_union (dest, src1, src2)
579 const re_node_set *src1, *src2;
582 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
584 dest->alloc = src1->nelem + src2->nelem;
585 dest->elems = re_malloc (int, dest->alloc);
586 if (BE (dest->elems == NULL, 0))
591 if (src1 != NULL && src1->nelem > 0)
592 return re_node_set_init_copy (dest, src1);
593 else if (src2 != NULL && src2->nelem > 0)
594 return re_node_set_init_copy (dest, src2);
596 re_node_set_init_empty (dest);
599 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
601 if (src1->elems[i1] > src2->elems[i2])
603 dest->elems[id++] = src2->elems[i2++];
606 if (src1->elems[i1] == src2->elems[i2])
608 dest->elems[id++] = src1->elems[i1++];
610 if (i1 < src1->nelem)
612 memcpy (dest->elems + id, src1->elems + i1,
613 (src1->nelem - i1) * sizeof (int));
614 id += src1->nelem - i1;
616 else if (i2 < src2->nelem)
618 memcpy (dest->elems + id, src2->elems + i2,
619 (src2->nelem - i2) * sizeof (int));
620 id += src2->nelem - i2;
626 /* Calculate the union set of the sets DEST and SRC. And store it to
627 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
630 re_node_set_merge (dest, src)
632 const re_node_set *src;
635 if (src == NULL || src->nelem == 0)
637 if (dest->alloc < src->nelem + dest->nelem)
639 dest->alloc = 2 * (src->nelem + dest->alloc);
640 dest->elems = re_realloc (dest->elems, int, dest->alloc);
641 if (BE (dest->elems == NULL, 0))
645 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
647 int cp_from, ncp, mid, right, src_elem = src->elems[si];
648 /* Binary search the spot we will add the new element. */
652 mid = (di + right) / 2;
653 if (dest->elems[mid] < src_elem)
658 if (di >= dest->nelem)
661 if (dest->elems[di] == src_elem)
663 /* Skip since, DEST already has the element. */
669 /* Skip the src elements which are less than dest->elems[di]. */
671 while (si < src->nelem && src->elems[si] < dest->elems[di])
673 /* Copy these src elements. */
675 memmove (dest->elems + di + ncp, dest->elems + di,
676 sizeof (int) * (dest->nelem - di));
677 memcpy (dest->elems + di, src->elems + cp_from,
679 /* Update counters. */
684 /* Copy remaining src elements. */
687 memcpy (dest->elems + di, src->elems + si,
688 sizeof (int) * (src->nelem - si));
689 dest->nelem += src->nelem - si;
694 /* Insert the new element ELEM to the re_node_set* SET.
695 return 0 if SET already has ELEM,
696 return -1 if an error is occured, return 1 otherwise. */
699 re_node_set_insert (set, elem)
704 /* In case of the set is empty. */
705 if (set->elems == NULL || set->alloc == 0)
707 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
713 /* Binary search the spot we will add the new element. */
718 mid = (idx + right) / 2;
719 if (set->elems[mid] < elem)
725 /* Realloc if we need. */
726 if (set->alloc < set->nelem + 1)
729 set->alloc = set->alloc * 2;
730 new_array = re_malloc (int, set->alloc);
731 if (BE (new_array == NULL, 0))
733 /* Copy the elements they are followed by the new element. */
735 memcpy (new_array, set->elems, sizeof (int) * (idx));
736 /* Copy the elements which follows the new element. */
737 if (set->nelem - idx > 0)
738 memcpy (new_array + idx + 1, set->elems + idx,
739 sizeof (int) * (set->nelem - idx));
740 set->elems = new_array;
744 /* Move the elements which follows the new element. */
745 if (set->nelem - idx > 0)
746 memmove (set->elems + idx + 1, set->elems + idx,
747 sizeof (int) * (set->nelem - idx));
749 /* Insert the new element. */
750 set->elems[idx] = elem;
755 /* Compare two node sets SET1 and SET2.
756 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
759 re_node_set_compare (set1, set2)
760 const re_node_set *set1, *set2;
763 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
765 for (i = 0 ; i < set1->nelem ; i++)
766 if (set1->elems[i] != set2->elems[i])
771 /* Return 1 if SET contains the element ELEM, return 0 otherwise. */
774 re_node_set_contains (set, elem)
775 const re_node_set *set;
782 /* Binary search the element. */
784 right = set->nelem - 1;
787 mid = (idx + right) / 2;
788 if (set->elems[mid] < elem)
793 return set->elems[idx] == elem;
797 re_node_set_remove_at (set, idx)
801 if (idx < 0 || idx >= set->nelem)
803 if (idx < set->nelem - 1)
804 memmove (set->elems + idx, set->elems + idx + 1,
805 sizeof (int) * (set->nelem - idx - 1));
810 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
811 Or return -1, if an error will be occured. */
814 re_dfa_add_node (dfa, token, mode)
819 if (dfa->nodes_len >= dfa->nodes_alloc)
821 re_token_t *new_array;
822 dfa->nodes_alloc *= 2;
823 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
824 if (BE (new_array == NULL, 0))
827 dfa->nodes = new_array;
830 int *new_firsts, *new_nexts;
831 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
833 new_firsts = re_realloc (dfa->firsts, int, dfa->nodes_alloc);
834 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
835 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
836 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
838 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
840 if (BE (new_firsts == NULL || new_nexts == NULL || new_edests == NULL
841 || new_eclosures == NULL || new_inveclosures == NULL, 0))
843 dfa->firsts = new_firsts;
844 dfa->nexts = new_nexts;
845 dfa->edests = new_edests;
846 dfa->eclosures = new_eclosures;
847 dfa->inveclosures = new_inveclosures;
850 dfa->nodes[dfa->nodes_len] = token;
851 dfa->nodes[dfa->nodes_len].duplicated = 0;
852 return dfa->nodes_len++;
855 static unsigned int inline
856 calc_state_hash (nodes, context)
857 const re_node_set *nodes;
858 unsigned int context;
860 unsigned int hash = nodes->nelem + context;
862 for (i = 0 ; i < nodes->nelem ; i++)
863 hash += nodes->elems[i];
867 /* Search for the state whose node_set is equivalent to NODES.
868 Return the pointer to the state, if we found it in the DFA.
869 Otherwise create the new one and return it. In case of an error
870 return NULL and set the error code in ERR.
871 Note: - We assume NULL as the invalid state, then it is possible that
872 return value is NULL and ERR is REG_NOERROR.
873 - We never return non-NULL value in case of any errors, it is for
876 static re_dfastate_t*
877 re_acquire_state (err, dfa, nodes)
880 const re_node_set *nodes;
883 re_dfastate_t *new_state;
884 struct re_state_table_entry *spot;
886 if (BE (nodes->nelem == 0, 0))
891 hash = calc_state_hash (nodes, 0);
892 spot = dfa->state_table + (hash & dfa->state_hash_mask);
894 for (i = 0 ; i < spot->num ; i++)
896 re_dfastate_t *state = spot->array[i];
897 if (hash != state->hash)
899 if (re_node_set_compare (&state->nodes, nodes))
903 /* There are no appropriate state in the dfa, create the new one. */
904 new_state = create_ci_newstate (dfa, nodes, hash);
905 if (BE (new_state != NULL, 1))
914 /* Search for the state whose node_set is equivalent to NODES and
915 whose context is equivalent to CONTEXT.
916 Return the pointer to the state, if we found it in the DFA.
917 Otherwise create the new one and return it. In case of an error
918 return NULL and set the error code in ERR.
919 Note: - We assume NULL as the invalid state, then it is possible that
920 return value is NULL and ERR is REG_NOERROR.
921 - We never return non-NULL value in case of any errors, it is for
924 static re_dfastate_t*
925 re_acquire_state_context (err, dfa, nodes, context)
928 const re_node_set *nodes;
929 unsigned int context;
932 re_dfastate_t *new_state;
933 struct re_state_table_entry *spot;
935 if (nodes->nelem == 0)
940 hash = calc_state_hash (nodes, context);
941 spot = dfa->state_table + (hash & dfa->state_hash_mask);
943 for (i = 0 ; i < spot->num ; i++)
945 re_dfastate_t *state = spot->array[i];
946 if (hash != state->hash)
948 if (re_node_set_compare (state->entrance_nodes, nodes)
949 && state->context == context)
952 /* There are no appropriate state in `dfa', create the new one. */
953 new_state = create_cd_newstate (dfa, nodes, context, hash);
954 if (BE (new_state != NULL, 1))
963 /* Allocate memory for DFA state and initialize common properties.
964 Return the new state if succeeded, otherwise return NULL. */
966 static re_dfastate_t *
967 create_newstate_common (dfa, nodes, hash)
969 const re_node_set *nodes;
972 re_dfastate_t *newstate;
973 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
974 if (BE (newstate == NULL, 0))
976 re_node_set_init_copy (&newstate->nodes, nodes);
977 newstate->trtable = NULL;
978 newstate->trtable_search = NULL;
979 newstate->hash = hash;
983 /* Store the new state NEWSTATE whose hash value is HASH in appropriate
984 position. Return value indicate the error code if failed. */
987 register_state (dfa, newstate, hash)
989 re_dfastate_t *newstate;
992 struct re_state_table_entry *spot;
993 spot = dfa->state_table + (hash & dfa->state_hash_mask);
995 if (spot->alloc <= spot->num)
997 spot->alloc = 2 * spot->num + 2;
998 spot->array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
999 if (BE (spot->array == NULL, 0))
1002 spot->array[spot->num++] = newstate;
1006 /* Create the new state which is independ of contexts.
1007 Return the new state if succeeded, otherwise return NULL. */
1009 static re_dfastate_t *
1010 create_ci_newstate (dfa, nodes, hash)
1012 const re_node_set *nodes;
1017 re_dfastate_t *newstate;
1018 newstate = create_newstate_common (dfa, nodes, hash);
1019 if (BE (newstate == NULL, 0))
1021 newstate->entrance_nodes = &newstate->nodes;
1023 for (i = 0 ; i < nodes->nelem ; i++)
1025 re_token_t *node = dfa->nodes + nodes->elems[i];
1026 re_token_type_t type = node->type;
1027 if (type == CHARACTER)
1030 /* If the state has the halt node, the state is a halt state. */
1031 else if (type == END_OF_RE)
1033 else if (type == COMPLEX_BRACKET
1034 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1035 newstate->accept_mb = 1;
1036 else if (type == OP_BACK_REF)
1037 newstate->has_backref = 1;
1038 else if (type == ANCHOR || OP_CONTEXT_NODE)
1040 newstate->has_constraint = 1;
1041 if (type == OP_CONTEXT_NODE
1042 && dfa->nodes[node->opr.ctx_info->entity].type == END_OF_RE)
1046 err = register_state (dfa, newstate, hash);
1047 return (err != REG_NOERROR) ? NULL : newstate;
1050 /* Create the new state which is depend on the context CONTEXT.
1051 Return the new state if succeeded, otherwise return NULL. */
1053 static re_dfastate_t *
1054 create_cd_newstate (dfa, nodes, context, hash)
1056 const re_node_set *nodes;
1057 unsigned int context, hash;
1059 int i, nctx_nodes = 0;
1061 re_dfastate_t *newstate;
1063 newstate = create_newstate_common (dfa, nodes, hash);
1064 if (BE (newstate == NULL, 0))
1066 newstate->context = context;
1067 newstate->entrance_nodes = &newstate->nodes;
1069 for (i = 0 ; i < nodes->nelem ; i++)
1071 unsigned int constraint = 0;
1072 re_token_t *node = dfa->nodes + nodes->elems[i];
1073 re_token_type_t type = node->type;
1074 if (type == CHARACTER)
1077 /* If the state has the halt node, the state is a halt state. */
1078 else if (type == END_OF_RE)
1080 else if (type == COMPLEX_BRACKET
1081 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1082 newstate->accept_mb = 1;
1083 else if (type == OP_BACK_REF)
1084 newstate->has_backref = 1;
1085 else if (type == ANCHOR)
1086 constraint = node->opr.ctx_type;
1087 else if (type == OP_CONTEXT_NODE)
1089 re_token_type_t ctype = dfa->nodes[node->opr.ctx_info->entity].type;
1090 constraint = node->constraint;
1091 if (ctype == END_OF_RE)
1093 else if (ctype == OP_BACK_REF)
1094 newstate->has_backref = 1;
1095 else if (ctype == COMPLEX_BRACKET
1096 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1097 newstate->accept_mb = 1;
1102 if (newstate->entrance_nodes == &newstate->nodes)
1104 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1105 if (BE (newstate->entrance_nodes == NULL, 0))
1107 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1109 newstate->has_constraint = 1;
1112 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1114 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1119 err = register_state (dfa, newstate, hash);
1120 return (err != REG_NOERROR) ? NULL : newstate;