l4/pkg/uclibc/lib/contrib/uclibc/libc/misc/regex/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  22                                      int n) internal_function;
  23 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
  24 static void match_ctx_free (re_match_context_t *cache) internal_function;
  25 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
  26                                           int str_idx, int from, int to)
  27      internal_function;
  28 static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
  29      internal_function;
  30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
  31                                            int str_idx) internal_function;
  32 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  33                                                    int node, int str_idx)
  34      internal_function;
  35 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  36                            re_dfastate_t **limited_sts, int last_node,
  37                            int last_str_idx)
  38      internal_function;
  39 static reg_errcode_t re_search_internal (const regex_t *preg,
  40                                          const char *string, int length,
  41                                          int start, int range, int stop,
  42                                          size_t nmatch, regmatch_t pmatch[],
  43                                          int eflags) internal_function;
  44 static int re_search_2_stub (struct re_pattern_buffer *bufp,
  45                              const char *string1, int length1,
  46                              const char *string2, int length2,
  47                              int start, int range, struct re_registers *regs,
  48                              int stop, int ret_len) internal_function;
  49 static int re_search_stub (struct re_pattern_buffer *bufp,
  50                            const char *string, int length, int start,
  51                            int range, int stop, struct re_registers *regs,
  52                            int ret_len) internal_function;
  53 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  54                               int nregs, int regs_allocated) internal_function;
  55 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
  56      internal_function;
  57 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
  58                            int *p_match_first) internal_function;
  59 static int check_halt_state_context (const re_match_context_t *mctx,
  60                                      const re_dfastate_t *state, int idx)
  61      internal_function;
  62 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  63                          regmatch_t *prev_idx_match, int cur_node,
  64                          int cur_idx, int nmatch) internal_function;
  65 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  66                                       int str_idx, int dest_node, int nregs,
  67                                       regmatch_t *regs,
  68                                       re_node_set *eps_via_nodes)
  69      internal_function;
  70 static reg_errcode_t set_regs (const regex_t *preg,
  71                                const re_match_context_t *mctx,
  72                                size_t nmatch, regmatch_t *pmatch,
  73                                int fl_backtrack) internal_function;
  74 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
  75      internal_function;
  76
  77 #ifdef RE_ENABLE_I18N
  78 static int sift_states_iter_mb (const re_match_context_t *mctx,
  79                                 re_sift_context_t *sctx,
  80                                 int node_idx, int str_idx, int max_str_idx)
  81      internal_function;
  82 #endif
  83 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  84                                            re_sift_context_t *sctx)
  85      internal_function;
  86 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  87                                           re_sift_context_t *sctx, int str_idx,
  88                                           re_node_set *cur_dest)
  89      internal_function;
  90 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  91                                               re_sift_context_t *sctx,
  92                                               int str_idx,
  93                                               re_node_set *dest_nodes)
  94      internal_function;
  95 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
  96                                             re_node_set *dest_nodes,
  97                                             const re_node_set *candidates)
  98      internal_function;
  99 static int check_dst_limits (const re_match_context_t *mctx,
 100                              re_node_set *limits,
 101                              int dst_node, int dst_idx, int src_node,
 102                              int src_idx) internal_function;
 103 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
 104                                         int boundaries, int subexp_idx,
 105                                         int from_node, int bkref_idx)
 106      internal_function;
 107 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
 108                                       int limit, int subexp_idx,
 109                                       int node, int str_idx,
 110                                       int bkref_idx) internal_function;
 111 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
 112                                           re_node_set *dest_nodes,
 113                                           const re_node_set *candidates,
 114                                           re_node_set *limits,
 115                                           struct re_backref_cache_entry *bkref_ents,
 116                                           int str_idx) internal_function;
 117 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 118                                         re_sift_context_t *sctx,
 119                                         int str_idx, const re_node_set *candidates)
 120      internal_function;
 121 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 122                                         re_dfastate_t **dst,
 123                                         re_dfastate_t **src, int num)
 124      internal_function;
 125 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 126                                          re_match_context_t *mctx) internal_function;
 127 static re_dfastate_t *transit_state (reg_errcode_t *err,
 128                                      re_match_context_t *mctx,
 129                                      re_dfastate_t *state) internal_function;
 130 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 131                                             re_match_context_t *mctx,
 132                                             re_dfastate_t *next_state)
 133      internal_function;
 134 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 135                                                 re_node_set *cur_nodes,
 136                                                 int str_idx) internal_function;
 137 #if 0
 138 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 139                                         re_match_context_t *mctx,
 140                                         re_dfastate_t *pstate)
 141      internal_function;
 142 #endif
 143 #ifdef RE_ENABLE_I18N
 144 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 145                                        re_dfastate_t *pstate)
 146      internal_function;
 147 #endif
 148 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 149                                           const re_node_set *nodes)
 150      internal_function;
 151 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 152                                  int bkref_node, int bkref_str_idx)
 153      internal_function;
 154 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 155                                      const re_sub_match_top_t *sub_top,
 156                                      re_sub_match_last_t *sub_last,
 157                                      int bkref_node, int bkref_str)
 158      internal_function;
 159 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 160                              int subexp_idx, int type) internal_function;
 161 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 162                                     state_array_t *path, int top_node,
 163                                     int top_str, int last_node, int last_str,
 164                                     int type) internal_function;
 165 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 166                                                    int str_idx,
 167                                                    re_node_set *cur_nodes,
 168                                                    re_node_set *next_nodes)
 169      internal_function;
 170 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 171                                                re_node_set *cur_nodes,
 172                                                int ex_subexp, int type)
 173      internal_function;
 174 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 175                                                    re_node_set *dst_nodes,
 176                                                    int target, int ex_subexp,
 177                                                    int type) internal_function;
 178 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 179                                          re_node_set *cur_nodes, int cur_str,
 180                                          int subexp_num, int type)
 181      internal_function;
 182 static int build_trtable (const re_dfa_t *dfa,
 183                           re_dfastate_t *state) internal_function;
 184 #ifdef RE_ENABLE_I18N
 185 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 186                                     const re_string_t *input, int idx)
 187      internal_function;
 188 #endif
 189 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
 190                                        const re_dfastate_t *state,
 191                                        re_node_set *states_node,
 192                                        bitset_t *states_ch) internal_function;
 193 static int check_node_accept (const re_match_context_t *mctx,
 194                               const re_token_t *node, int idx)
 195      internal_function;
 196 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
 197      internal_function;
 198
 199 /* Entry point for POSIX code.  */
 200
 201 /* regexec searches for a given pattern, specified by PREG, in the
 202    string STRING.
 203
 204    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 205    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 206    least NMATCH elements, and we set them to the offsets of the
 207    corresponding matched substrings.
 208
 209    EFLAGS specifies `execution flags' which affect matching: if
 210    REG_NOTBOL is set, then ^ does not match at the beginning of the
 211    string; if REG_NOTEOL is set, then $ does not match at the end.
 212
 213    We return 0 if we find a match and REG_NOMATCH if not.  */
 214
 215 int
 216 regexec (preg, string, nmatch, pmatch, eflags)
 217     const regex_t *__restrict preg;
 218     const char *__restrict string;
 219     size_t nmatch;
 220     regmatch_t pmatch[];
 221     int eflags;
 222 {
 223   reg_errcode_t err;
 224   int start, length;
 225 #ifndef __UCLIBC__ /* libc_lock_lock does not exist */
 226   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 227 #endif
 228
 229   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 230     return REG_BADPAT;
 231
 232   if (eflags & REG_STARTEND)
 233     {
 234       start = pmatch[0].rm_so;
 235       length = pmatch[0].rm_eo;
 236     }
 237   else
 238     {
 239       start = 0;
 240       length = strlen (string);
 241     }
 242
 243   __libc_lock_lock (dfa->lock);
 244   if (preg->no_sub)
 245     err = re_search_internal (preg, string, length, start, length - start,
 246                               length, 0, NULL, eflags);
 247   else
 248     err = re_search_internal (preg, string, length, start, length - start,
 249                               length, nmatch, pmatch, eflags);
 250   __libc_lock_unlock (dfa->lock);
 251   return err != REG_NOERROR;
 252 }
 253 libc_hidden_def(regexec)
 254
 255 /* Entry points for GNU code.  */
 256
 257 /* re_match, re_search, re_match_2, re_search_2
 258
 259    The former two functions operate on STRING with length LENGTH,
 260    while the later two operate on concatenation of STRING1 and STRING2
 261    with lengths LENGTH1 and LENGTH2, respectively.
 262
 263    re_match() matches the compiled pattern in BUFP against the string,
 264    starting at index START.
 265
 266    re_search() first tries matching at index START, then it tries to match
 267    starting from index START + 1, and so on.  The last start position tried
 268    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 269    way as re_match().)
 270
 271    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 272    the first STOP characters of the concatenation of the strings should be
 273    concerned.
 274
 275    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 276    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
 277    computed relative to the concatenation, not relative to the individual
 278    strings.)
 279
 280    On success, re_match* functions return the length of the match, re_search*
 281    return the position of the start of the match.  Return value -1 means no
 282    match was found and -2 indicates an internal error.  */
 283
 284 int
 285 re_match (bufp, string, length, start, regs)
 286     struct re_pattern_buffer *bufp;
 287     const char *string;
 288     int length, start;
 289     struct re_registers *regs;
 290 {
 291   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 292 }
 293
 294 int
 295 re_search (bufp, string, length, start, range, regs)
 296     struct re_pattern_buffer *bufp;
 297     const char *string;
 298     int length, start, range;
 299     struct re_registers *regs;
 300 {
 301   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
 302 }
 303 libc_hidden_def(re_search)
 304
 305 int
 306 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 307     struct re_pattern_buffer *bufp;
 308     const char *string1, *string2;
 309     int length1, length2, start, stop;
 310     struct re_registers *regs;
 311 {
 312   return re_search_2_stub (bufp, string1, length1, string2, length2,
 313                            start, 0, regs, stop, 1);
 314 }
 315
 316 int
 317 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 318     struct re_pattern_buffer *bufp;
 319     const char *string1, *string2;
 320     int length1, length2, start, range, stop;
 321     struct re_registers *regs;
 322 {
 323   return re_search_2_stub (bufp, string1, length1, string2, length2,
 324                            start, range, regs, stop, 0);
 325 }
 326 libc_hidden_def(re_search_2)
 327
 328 static int
 329 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
 330                   stop, ret_len)
 331     struct re_pattern_buffer *bufp;
 332     const char *string1, *string2;
 333     int length1, length2, start, range, stop, ret_len;
 334     struct re_registers *regs;
 335 {
 336   const char *str;
 337   int rval;
 338   int len = length1 + length2;
 339   int free_str = 0;
 340
 341   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
 342     return -2;
 343
 344   /* Concatenate the strings.  */
 345   if (length2 > 0)
 346     if (length1 > 0)
 347       {
 348         char *s = re_malloc (char, len);
 349
 350         if (BE (s == NULL, 0))
 351           return -2;
 352         memcpy (s, string1, length1);
 353         memcpy (s + length1, string2, length2);
 354         str = s;
 355         free_str = 1;
 356       }
 357     else
 358       str = string2;
 359   else
 360     str = string1;
 361
 362   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 363                          ret_len);
 364   if (free_str)
 365     re_free ((char *) str);
 366   return rval;
 367 }
 368
 369 /* The parameters have the same meaning as those of re_search.
 370    Additional parameters:
 371    If RET_LEN is nonzero the length of the match is returned (re_match style);
 372    otherwise the position of the match is returned.  */
 373
 374 static int
 375 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
 376     struct re_pattern_buffer *bufp;
 377     const char *string;
 378     int length, start, range, stop, ret_len;
 379     struct re_registers *regs;
 380 {
 381   reg_errcode_t result;
 382   regmatch_t *pmatch;
 383   int nregs, rval;
 384   int eflags = 0;
 385 #ifndef __UCLIBC__ /* libc_lock_lock does not exist */
 386   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
 387 #endif
 388
 389   /* Check for out-of-range.  */
 390   if (BE (start < 0 || start > length, 0))
 391     return -1;
 392   if (BE (start + range > length, 0))
 393     range = length - start;
 394   else if (BE (start + range < 0, 0))
 395     range = -start;
 396
 397   __libc_lock_lock (dfa->lock);
 398
 399   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 400   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 401
 402   /* Compile fastmap if we haven't yet.  */
 403   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 404     re_compile_fastmap (bufp);
 405
 406   if (BE (bufp->no_sub, 0))
 407     regs = NULL;
 408
 409   /* We need at least 1 register.  */
 410   if (regs == NULL)
 411     nregs = 1;
 412   else if (BE (bufp->regs_allocated == REGS_FIXED &&
 413                regs->num_regs < bufp->re_nsub + 1, 0))
 414     {
 415       nregs = regs->num_regs;
 416       if (BE (nregs < 1, 0))
 417         {
 418           /* Nothing can be copied to regs.  */
 419           regs = NULL;
 420           nregs = 1;
 421         }
 422     }
 423   else
 424     nregs = bufp->re_nsub + 1;
 425   pmatch = re_malloc (regmatch_t, nregs);
 426   if (BE (pmatch == NULL, 0))
 427     {
 428       rval = -2;
 429       goto out;
 430     }
 431
 432   result = re_search_internal (bufp, string, length, start, range, stop,
 433                                nregs, pmatch, eflags);
 434
 435   rval = 0;
 436
 437   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 438   if (result != REG_NOERROR)
 439     rval = -1;
 440   else if (regs != NULL)
 441     {
 442       /* If caller wants register contents data back, copy them.  */
 443       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 444                                            bufp->regs_allocated);
 445       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 446         rval = -2;
 447     }
 448
 449   if (BE (rval == 0, 1))
 450     {
 451       if (ret_len)
 452         {
 453           assert (pmatch[0].rm_so == start);
 454           rval = pmatch[0].rm_eo - start;
 455         }
 456       else
 457         rval = pmatch[0].rm_so;
 458     }
 459   re_free (pmatch);
 460  out:
 461   __libc_lock_unlock (dfa->lock);
 462   return rval;
 463 }
 464
 465 static unsigned
 466 re_copy_regs (regs, pmatch, nregs, regs_allocated)
 467     struct re_registers *regs;
 468     regmatch_t *pmatch;
 469     int nregs, regs_allocated;
 470 {
 471   int rval = REGS_REALLOCATE;
 472   int i;
 473   int need_regs = nregs + 1;
 474   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 475      uses.  */
 476
 477   /* Have the register data arrays been allocated?  */
 478   if (regs_allocated == REGS_UNALLOCATED)
 479     { /* No.  So allocate them with malloc.  */
 480       regs->start = re_malloc (regoff_t, need_regs);
 481       regs->end = re_malloc (regoff_t, need_regs);
 482       if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
 483         return REGS_UNALLOCATED;
 484       regs->num_regs = need_regs;
 485     }
 486   else if (regs_allocated == REGS_REALLOCATE)
 487     { /* Yes.  If we need more elements than were already
 488          allocated, reallocate them.  If we need fewer, just
 489          leave it alone.  */
 490       if (BE (need_regs > regs->num_regs, 0))
 491         {
 492           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 493           regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
 494           if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
 495             return REGS_UNALLOCATED;
 496           regs->start = new_start;
 497           regs->end = new_end;
 498           regs->num_regs = need_regs;
 499         }
 500     }
 501   else
 502     {
 503       assert (regs_allocated == REGS_FIXED);
 504       /* This function may not be called with REGS_FIXED and nregs too big.  */
 505       assert (regs->num_regs >= nregs);
 506       rval = REGS_FIXED;
 507     }
 508
 509   /* Copy the regs.  */
 510   for (i = 0; i < nregs; ++i)
 511     {
 512       regs->start[i] = pmatch[i].rm_so;
 513       regs->end[i] = pmatch[i].rm_eo;
 514     }
 515   for ( ; i < regs->num_regs; ++i)
 516     regs->start[i] = regs->end[i] = -1;
 517
 518   return rval;
 519 }
 520
 521 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 522    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 523    this memory for recording register information.  STARTS and ENDS
 524    must be allocated using the malloc library routine, and must each
 525    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 526
 527    If NUM_REGS == 0, then subsequent matches should allocate their own
 528    register data.
 529
 530    Unless this function is called, the first search or match using
 531    PATTERN_BUFFER will allocate its own register data, without
 532    freeing the old data.  */
 533
 534 void
 535 re_set_registers (bufp, regs, num_regs, starts, ends)
 536     struct re_pattern_buffer *bufp;
 537     struct re_registers *regs;
 538     unsigned num_regs;
 539     regoff_t *starts, *ends;
 540 {
 541   if (num_regs)
 542     {
 543       bufp->regs_allocated = REGS_REALLOCATE;
 544       regs->num_regs = num_regs;
 545       regs->start = starts;
 546       regs->end = ends;
 547     }
 548   else
 549     {
 550       bufp->regs_allocated = REGS_UNALLOCATED;
 551       regs->num_regs = 0;
 552       regs->start = regs->end = (regoff_t *) 0;
 553     }
 554 }
 555
 556 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 557    them unless specifically requested.  */
 558
 559 #if defined _REGEX_RE_COMP || defined __UCLIBC__
 560 int
 561 weak_function
 562 re_exec (const char *s)
 563 {
 564   return 0 == regexec (re_comp_buf, s, 0, NULL, 0);
 565 }
 566 #endif
 567
 568 /* Internal entry point.  */
 569
 570 /* Searches for a compiled pattern PREG in the string STRING, whose
 571    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 572    mingings with regexec.  START, and RANGE have the same meanings
 573    with re_search.
 574    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 575    otherwise return the error code.
 576    Note: We assume front end functions already check ranges.
 577    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 578
 579 static reg_errcode_t
 580 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
 581                     eflags)
 582     const regex_t *preg;
 583     const char *string;
 584     int length, start, range, stop, eflags;
 585     size_t nmatch;
 586     regmatch_t pmatch[];
 587 {
 588   reg_errcode_t err;
 589   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
 590   int left_lim, right_lim, incr;
 591   int fl_longest_match, match_first, match_kind, match_last = -1;
 592   int extra_nmatch;
 593   int sb, ch;
 594   re_match_context_t mctx;
 595   char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
 596                    && range && !preg->can_be_null) ? preg->fastmap : NULL;
 597   RE_TRANSLATE_TYPE t = preg->translate;
 598
 599   memset (&mctx, '\0', sizeof (re_match_context_t));
 600   mctx.dfa = dfa;
 601
 602   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 603   nmatch -= extra_nmatch;
 604
 605   /* Check if the DFA haven't been compiled.  */
 606   if (BE (preg->used == 0 || dfa->init_state == NULL
 607           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 608           || dfa->init_state_begbuf == NULL, 0))
 609     return REG_NOMATCH;
 610
 611 #ifdef DEBUG
 612   /* We assume front-end functions already check them.  */
 613   assert (start + range >= 0 && start + range <= length);
 614 #endif
 615
 616   /* If initial states with non-begbuf contexts have no elements,
 617      the regex must be anchored.  If preg->newline_anchor is set,
 618      we'll never use init_state_nl, so do not check it.  */
 619   if (dfa->init_state->nodes.nelem == 0
 620       && dfa->init_state_word->nodes.nelem == 0
 621       && (dfa->init_state_nl->nodes.nelem == 0
 622           || !preg->newline_anchor))
 623     {
 624       if (start != 0 && start + range != 0)
 625         return REG_NOMATCH;
 626       start = range = 0;
 627     }
 628
 629   /* We must check the longest matching, if nmatch > 0.  */
 630   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 631
 632   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 633                             preg->translate, preg->syntax & RE_ICASE, dfa);
 634   if (BE (err != REG_NOERROR, 0))
 635     goto free_return;
 636   mctx.input.stop = stop;
 637   mctx.input.raw_stop = stop;
 638   mctx.input.newline_anchor = preg->newline_anchor;
 639
 640   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 641   if (BE (err != REG_NOERROR, 0))
 642     goto free_return;
 643
 644   /* We will log all the DFA states through which the dfa pass,
 645      if nmatch > 1, or this dfa has "multibyte node", which is a
 646      back-reference or a node which can accept multibyte character or
 647      multi character collating element.  */
 648   if (nmatch > 1 || dfa->has_mb_node)
 649     {
 650       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 651       if (BE (mctx.state_log == NULL, 0))
 652         {
 653           err = REG_ESPACE;
 654           goto free_return;
 655         }
 656     }
 657   else
 658     mctx.state_log = NULL;
 659
 660   match_first = start;
 661   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 662                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 663
 664   /* Check incrementally whether of not the input string match.  */
 665   incr = (range < 0) ? -1 : 1;
 666   left_lim = (range < 0) ? start + range : start;
 667   right_lim = (range < 0) ? start : start + range;
 668   sb = dfa->mb_cur_max == 1;
 669   match_kind =
 670     (fastmap
 671      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 672         | (range >= 0 ? 2 : 0)
 673         | (t != NULL ? 1 : 0))
 674      : 8);
 675
 676   for (;; match_first += incr)
 677     {
 678       err = REG_NOMATCH;
 679       if (match_first < left_lim || right_lim < match_first)
 680         goto free_return;
 681
 682       /* Advance as rapidly as possible through the string, until we
 683          find a plausible place to start matching.  This may be done
 684          with varying efficiency, so there are various possibilities:
 685          only the most common of them are specialized, in order to
 686          save on code size.  We use a switch statement for speed.  */
 687       switch (match_kind)
 688         {
 689         case 8:
 690           /* No fastmap.  */
 691           break;
 692
 693         case 7:
 694           /* Fastmap with single-byte translation, match forward.  */
 695           while (BE (match_first < right_lim, 1)
 696                  && !fastmap[t[(unsigned char) string[match_first]]])
 697             ++match_first;
 698           goto forward_match_found_start_or_reached_end;
 699
 700         case 6:
 701           /* Fastmap without translation, match forward.  */
 702           while (BE (match_first < right_lim, 1)
 703                  && !fastmap[(unsigned char) string[match_first]])
 704             ++match_first;
 705
 706         forward_match_found_start_or_reached_end:
 707           if (BE (match_first == right_lim, 0))
 708             {
 709               ch = match_first >= length
 710                        ? 0 : (unsigned char) string[match_first];
 711               if (!fastmap[t ? t[ch] : ch])
 712                 goto free_return;
 713             }
 714           break;
 715
 716         case 4:
 717         case 5:
 718           /* Fastmap without multi-byte translation, match backwards.  */
 719           while (match_first >= left_lim)
 720             {
 721               ch = match_first >= length
 722                        ? 0 : (unsigned char) string[match_first];
 723               if (fastmap[t ? t[ch] : ch])
 724                 break;
 725               --match_first;
 726             }
 727           if (match_first < left_lim)
 728             goto free_return;
 729           break;
 730
 731         default:
 732           /* In this case, we can't determine easily the current byte,
 733              since it might be a component byte of a multibyte
 734              character.  Then we use the constructed buffer instead.  */
 735           for (;;)
 736             {
 737               /* If MATCH_FIRST is out of the valid range, reconstruct the
 738                  buffers.  */
 739               unsigned int offset = match_first - mctx.input.raw_mbs_idx;
 740               if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
 741                 {
 742                   err = re_string_reconstruct (&mctx.input, match_first,
 743                                                eflags);
 744                   if (BE (err != REG_NOERROR, 0))
 745                     goto free_return;
 746
 747                   offset = match_first - mctx.input.raw_mbs_idx;
 748                 }
 749               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 750                  Note that MATCH_FIRST must not be smaller than 0.  */
 751               ch = (match_first >= length
 752                     ? 0 : re_string_byte_at (&mctx.input, offset));
 753               if (fastmap[ch])
 754                 break;
 755               match_first += incr;
 756               if (match_first < left_lim || match_first > right_lim)
 757                 {
 758                   err = REG_NOMATCH;
 759                   goto free_return;
 760                 }
 761             }
 762           break;
 763         }
 764
 765       /* Reconstruct the buffers so that the matcher can assume that
 766          the matching starts from the beginning of the buffer.  */
 767       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 768       if (BE (err != REG_NOERROR, 0))
 769         goto free_return;
 770
 771 #ifdef RE_ENABLE_I18N
 772      /* Don't consider this char as a possible match start if it part,
 773         yet isn't the head, of a multibyte character.  */
 774       if (!sb && !re_string_first_byte (&mctx.input, 0))
 775         continue;
 776 #endif
 777
 778       /* It seems to be appropriate one, then use the matcher.  */
 779       /* We assume that the matching starts from 0.  */
 780       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 781       match_last = check_matching (&mctx, fl_longest_match,
 782                                    range >= 0 ? &match_first : NULL);
 783       if (match_last != -1)
 784         {
 785           if (BE (match_last == -2, 0))
 786             {
 787               err = REG_ESPACE;
 788               goto free_return;
 789             }
 790           else
 791             {
 792               mctx.match_last = match_last;
 793               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 794                 {
 795                   re_dfastate_t *pstate = mctx.state_log[match_last];
 796                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 797                                                              match_last);
 798                 }
 799               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 800                   || dfa->nbackref)
 801                 {
 802                   err = prune_impossible_nodes (&mctx);
 803                   if (err == REG_NOERROR)
 804                     break;
 805                   if (BE (err != REG_NOMATCH, 0))
 806                     goto free_return;
 807                   match_last = -1;
 808                 }
 809               else
 810                 break; /* We found a match.  */
 811             }
 812         }
 813
 814       match_ctx_clean (&mctx);
 815     }
 816
 817 #ifdef DEBUG
 818   assert (match_last != -1);
 819   assert (err == REG_NOERROR);
 820 #endif
 821
 822   /* Set pmatch[] if we need.  */
 823   if (nmatch > 0)
 824     {
 825       int reg_idx;
 826
 827       /* Initialize registers.  */
 828       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 829         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 830
 831       /* Set the points where matching start/end.  */
 832       pmatch[0].rm_so = 0;
 833       pmatch[0].rm_eo = mctx.match_last;
 834
 835       if (!preg->no_sub && nmatch > 1)
 836         {
 837           err = set_regs (preg, &mctx, nmatch, pmatch,
 838                           dfa->has_plural_match && dfa->nbackref > 0);
 839           if (BE (err != REG_NOERROR, 0))
 840             goto free_return;
 841         }
 842
 843       /* At last, add the offset to the each registers, since we slided
 844          the buffers so that we could assume that the matching starts
 845          from 0.  */
 846       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 847         if (pmatch[reg_idx].rm_so != -1)
 848           {
 849 #ifdef RE_ENABLE_I18N
 850             if (BE (mctx.input.offsets_needed != 0, 0))
 851               {
 852                 pmatch[reg_idx].rm_so =
 853                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 854                    ? mctx.input.valid_raw_len
 855                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 856                 pmatch[reg_idx].rm_eo =
 857                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 858                    ? mctx.input.valid_raw_len
 859                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 860               }
 861 #else
 862             assert (mctx.input.offsets_needed == 0);
 863 #endif
 864             pmatch[reg_idx].rm_so += match_first;
 865             pmatch[reg_idx].rm_eo += match_first;
 866           }
 867       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 868         {
 869           pmatch[nmatch + reg_idx].rm_so = -1;
 870           pmatch[nmatch + reg_idx].rm_eo = -1;
 871         }
 872
 873       if (dfa->subexp_map)
 874         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 875           if (dfa->subexp_map[reg_idx] != reg_idx)
 876             {
 877               pmatch[reg_idx + 1].rm_so
 878                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 879               pmatch[reg_idx + 1].rm_eo
 880                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 881             }
 882     }
 883
 884  free_return:
 885   re_free (mctx.state_log);
 886   if (dfa->nbackref)
 887     match_ctx_free (&mctx);
 888   re_string_destruct (&mctx.input);
 889   return err;
 890 }
 891
 892 static reg_errcode_t
 893 prune_impossible_nodes (mctx)
 894      re_match_context_t *mctx;
 895 {
 896   const re_dfa_t *const dfa = mctx->dfa;
 897   int halt_node, match_last;
 898   reg_errcode_t ret;
 899   re_dfastate_t **sifted_states;
 900   re_dfastate_t **lim_states = NULL;
 901   re_sift_context_t sctx;
 902 #ifdef DEBUG
 903   assert (mctx->state_log != NULL);
 904 #endif
 905   match_last = mctx->match_last;
 906   halt_node = mctx->last_node;
 907   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 908   if (BE (sifted_states == NULL, 0))
 909     {
 910       ret = REG_ESPACE;
 911       goto free_return;
 912     }
 913   if (dfa->nbackref)
 914     {
 915       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 916       if (BE (lim_states == NULL, 0))
 917         {
 918           ret = REG_ESPACE;
 919           goto free_return;
 920         }
 921       while (1)
 922         {
 923           memset (lim_states, '\0',
 924                   sizeof (re_dfastate_t *) * (match_last + 1));
 925           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 926                          match_last);
 927           ret = sift_states_backward (mctx, &sctx);
 928           re_node_set_free (&sctx.limits);
 929           if (BE (ret != REG_NOERROR, 0))
 930               goto free_return;
 931           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 932             break;
 933           do
 934             {
 935               --match_last;
 936               if (match_last < 0)
 937                 {
 938                   ret = REG_NOMATCH;
 939                   goto free_return;
 940                 }
 941             } while (mctx->state_log[match_last] == NULL
 942                      || !mctx->state_log[match_last]->halt);
 943           halt_node = check_halt_state_context (mctx,
 944                                                 mctx->state_log[match_last],
 945                                                 match_last);
 946         }
 947       ret = merge_state_array (dfa, sifted_states, lim_states,
 948                                match_last + 1);
 949       re_free (lim_states);
 950       lim_states = NULL;
 951       if (BE (ret != REG_NOERROR, 0))
 952         goto free_return;
 953     }
 954   else
 955     {
 956       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
 957       ret = sift_states_backward (mctx, &sctx);
 958       re_node_set_free (&sctx.limits);
 959       if (BE (ret != REG_NOERROR, 0))
 960         goto free_return;
 961     }
 962   re_free (mctx->state_log);
 963   mctx->state_log = sifted_states;
 964   sifted_states = NULL;
 965   mctx->last_node = halt_node;
 966   mctx->match_last = match_last;
 967   ret = REG_NOERROR;
 968  free_return:
 969   re_free (sifted_states);
 970   re_free (lim_states);
 971   return ret;
 972 }
 973
 974 /* Acquire an initial state and return it.
 975    We must select appropriate initial state depending on the context,
 976    since initial states may have constraints like "\<", "^", etc..  */
 977
 978 static __inline__ re_dfastate_t *
 979 __attribute ((always_inline)) internal_function
 980 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
 981                             int idx)
 982 {
 983   const re_dfa_t *const dfa = mctx->dfa;
 984   if (dfa->init_state->has_constraint)
 985     {
 986       unsigned int context;
 987       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
 988       if (IS_WORD_CONTEXT (context))
 989         return dfa->init_state_word;
 990       else if (IS_ORDINARY_CONTEXT (context))
 991         return dfa->init_state;
 992       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 993         return dfa->init_state_begbuf;
 994       else if (IS_NEWLINE_CONTEXT (context))
 995         return dfa->init_state_nl;
 996       else if (IS_BEGBUF_CONTEXT (context))
 997         {
 998           /* It is relatively rare case, then calculate on demand.  */
 999           return re_acquire_state_context (err, dfa,
1000                                            dfa->init_state->entrance_nodes,
1001                                            context);
1002         }
1003       else
1004         /* Must not happen?  */
1005         return dfa->init_state;
1006     }
1007   else
1008     return dfa->init_state;
1009 }
1010
1011 /* Check whether the regular expression match input string INPUT or not,
1012    and return the index where the matching end, return -1 if not match,
1013    or return -2 in case of an error.
1014    FL_LONGEST_MATCH means we want the POSIX longest matching.
1015    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1016    next place where we may want to try matching.
1017    Note that the matcher assume that the maching starts from the current
1018    index of the buffer.  */
1019
1020 static int
1021 internal_function
1022 check_matching (re_match_context_t *mctx, int fl_longest_match,
1023                 int *p_match_first)
1024 {
1025   const re_dfa_t *const dfa = mctx->dfa;
1026   reg_errcode_t err;
1027   int match = 0;
1028   int match_last = -1;
1029   int cur_str_idx = re_string_cur_idx (&mctx->input);
1030   re_dfastate_t *cur_state;
1031   int at_init_state = p_match_first != NULL;
1032   int next_start_idx = cur_str_idx;
1033
1034   err = REG_NOERROR;
1035   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1036   /* An initial state must not be NULL (invalid).  */
1037   if (BE (cur_state == NULL, 0))
1038     {
1039       assert (err == REG_ESPACE);
1040       return -2;
1041     }
1042
1043   if (mctx->state_log != NULL)
1044     {
1045       mctx->state_log[cur_str_idx] = cur_state;
1046
1047       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1048          later.  E.g. Processing back references.  */
1049       if (BE (dfa->nbackref, 0))
1050         {
1051           at_init_state = 0;
1052           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1053           if (BE (err != REG_NOERROR, 0))
1054             return err;
1055
1056           if (cur_state->has_backref)
1057             {
1058               err = transit_state_bkref (mctx, &cur_state->nodes);
1059               if (BE (err != REG_NOERROR, 0))
1060                 return err;
1061             }
1062         }
1063     }
1064
1065   /* If the RE accepts NULL string.  */
1066   if (BE (cur_state->halt, 0))
1067     {
1068       if (!cur_state->has_constraint
1069           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1070         {
1071           if (!fl_longest_match)
1072             return cur_str_idx;
1073           else
1074             {
1075               match_last = cur_str_idx;
1076               match = 1;
1077             }
1078         }
1079     }
1080
1081   while (!re_string_eoi (&mctx->input))
1082     {
1083       re_dfastate_t *old_state = cur_state;
1084       int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1085
1086       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1087           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1088               && mctx->input.valid_len < mctx->input.len))
1089         {
1090           err = extend_buffers (mctx);
1091           if (BE (err != REG_NOERROR, 0))
1092             {
1093               assert (err == REG_ESPACE);
1094               return -2;
1095             }
1096         }
1097
1098       cur_state = transit_state (&err, mctx, cur_state);
1099       if (mctx->state_log != NULL)
1100         cur_state = merge_state_with_log (&err, mctx, cur_state);
1101
1102       if (cur_state == NULL)
1103         {
1104           /* Reached the invalid state or an error.  Try to recover a valid
1105              state using the state log, if available and if we have not
1106              already found a valid (even if not the longest) match.  */
1107           if (BE (err != REG_NOERROR, 0))
1108             return -2;
1109
1110           if (mctx->state_log == NULL
1111               || (match && !fl_longest_match)
1112               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1113             break;
1114         }
1115
1116       if (BE (at_init_state, 0))
1117         {
1118           if (old_state == cur_state)
1119             next_start_idx = next_char_idx;
1120           else
1121             at_init_state = 0;
1122         }
1123
1124       if (cur_state->halt)
1125         {
1126           /* Reached a halt state.
1127              Check the halt state can satisfy the current context.  */
1128           if (!cur_state->has_constraint
1129               || check_halt_state_context (mctx, cur_state,
1130                                            re_string_cur_idx (&mctx->input)))
1131             {
1132               /* We found an appropriate halt state.  */
1133               match_last = re_string_cur_idx (&mctx->input);
1134               match = 1;
1135
1136               /* We found a match, do not modify match_first below.  */
1137               p_match_first = NULL;
1138               if (!fl_longest_match)
1139                 break;
1140             }
1141         }
1142     }
1143
1144   if (p_match_first)
1145     *p_match_first += next_start_idx;
1146
1147   return match_last;
1148 }
1149
1150 /* Check NODE match the current context.  */
1151
1152 static int
1153 internal_function
1154 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1155 {
1156   re_token_type_t type = dfa->nodes[node].type;
1157   unsigned int constraint = dfa->nodes[node].constraint;
1158   if (type != END_OF_RE)
1159     return 0;
1160   if (!constraint)
1161     return 1;
1162   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1163     return 0;
1164   return 1;
1165 }
1166
1167 /* Check the halt state STATE match the current context.
1168    Return 0 if not match, if the node, STATE has, is a halt node and
1169    match the context, return the node.  */
1170
1171 static int
1172 internal_function
1173 check_halt_state_context (const re_match_context_t *mctx,
1174                           const re_dfastate_t *state, int idx)
1175 {
1176   int i;
1177   unsigned int context;
1178 #ifdef DEBUG
1179   assert (state->halt);
1180 #endif
1181   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1182   for (i = 0; i < state->nodes.nelem; ++i)
1183     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1184       return state->nodes.elems[i];
1185   return 0;
1186 }
1187
1188 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1189    corresponding to the DFA).
1190    Return the destination node, and update EPS_VIA_NODES, return -1 in case
1191    of errors.  */
1192
1193 static int
1194 internal_function
1195 proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1196                    int *pidx, int node, re_node_set *eps_via_nodes,
1197                    struct re_fail_stack_t *fs)
1198 {
1199   const re_dfa_t *const dfa = mctx->dfa;
1200   int i, err;
1201   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1202     {
1203       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1204       re_node_set *edests = &dfa->edests[node];
1205       int dest_node;
1206       err = re_node_set_insert (eps_via_nodes, node);
1207       if (BE (err < 0, 0))
1208         return -2;
1209       /* Pick up a valid destination, or return -1 if none is found.  */
1210       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1211         {
1212           int candidate = edests->elems[i];
1213           if (!re_node_set_contains (cur_nodes, candidate))
1214             continue;
1215           if (dest_node == -1)
1216             dest_node = candidate;
1217
1218           else
1219             {
1220               /* In order to avoid infinite loop like "(a*)*", return the second
1221                  epsilon-transition if the first was already considered.  */
1222               if (re_node_set_contains (eps_via_nodes, dest_node))
1223                 return candidate;
1224
1225               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1226               else if (fs != NULL
1227                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1228                                            eps_via_nodes))
1229                 return -2;
1230
1231               /* We know we are going to exit.  */
1232               break;
1233             }
1234         }
1235       return dest_node;
1236     }
1237   else
1238     {
1239       int naccepted = 0;
1240       re_token_type_t type = dfa->nodes[node].type;
1241
1242 #ifdef RE_ENABLE_I18N
1243       if (dfa->nodes[node].accept_mb)
1244         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1245       else
1246 #endif /* RE_ENABLE_I18N */
1247       if (type == OP_BACK_REF)
1248         {
1249           int subexp_idx = dfa->nodes[node].opr.idx + 1;
1250           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1251           if (fs != NULL)
1252             {
1253               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1254                 return -1;
1255               else if (naccepted)
1256                 {
1257                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1258                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1259                               naccepted) != 0)
1260                     return -1;
1261                 }
1262             }
1263
1264           if (naccepted == 0)
1265             {
1266               int dest_node;
1267               err = re_node_set_insert (eps_via_nodes, node);
1268               if (BE (err < 0, 0))
1269                 return -2;
1270               dest_node = dfa->edests[node].elems[0];
1271               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1272                                         dest_node))
1273                 return dest_node;
1274             }
1275         }
1276
1277       if (naccepted != 0
1278           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1279         {
1280           int dest_node = dfa->nexts[node];
1281           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1282           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1283                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1284                                                dest_node)))
1285             return -1;
1286           re_node_set_empty (eps_via_nodes);
1287           return dest_node;
1288         }
1289     }
1290   return -1;
1291 }
1292
1293 static reg_errcode_t
1294 internal_function
1295 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1296                  int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1297 {
1298   reg_errcode_t err;
1299   int num = fs->num++;
1300   if (fs->num == fs->alloc)
1301     {
1302       struct re_fail_stack_ent_t *new_array;
1303       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1304                                        * fs->alloc * 2));
1305       if (new_array == NULL)
1306         return REG_ESPACE;
1307       fs->alloc *= 2;
1308       fs->stack = new_array;
1309     }
1310   fs->stack[num].idx = str_idx;
1311   fs->stack[num].node = dest_node;
1312   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1313   if (fs->stack[num].regs == NULL)
1314     return REG_ESPACE;
1315   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1316   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1317   return err;
1318 }
1319
1320 static int
1321 internal_function
1322 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1323                 regmatch_t *regs, re_node_set *eps_via_nodes)
1324 {
1325   int num = --fs->num;
1326   assert (num >= 0);
1327   *pidx = fs->stack[num].idx;
1328   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1329   re_node_set_free (eps_via_nodes);
1330   re_free (fs->stack[num].regs);
1331   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1332   return fs->stack[num].node;
1333 }
1334
1335 /* Set the positions where the subexpressions are starts/ends to registers
1336    PMATCH.
1337    Note: We assume that pmatch[0] is already set, and
1338    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1339
1340 static reg_errcode_t
1341 internal_function
1342 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1343           regmatch_t *pmatch, int fl_backtrack)
1344 {
1345   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1346   int idx, cur_node;
1347   re_node_set eps_via_nodes;
1348   struct re_fail_stack_t *fs;
1349   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1350   regmatch_t *prev_idx_match;
1351   int prev_idx_match_malloced = 0;
1352
1353 #ifdef DEBUG
1354   assert (nmatch > 1);
1355   assert (mctx->state_log != NULL);
1356 #endif
1357   if (fl_backtrack)
1358     {
1359       fs = &fs_body;
1360       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1361       if (fs->stack == NULL)
1362         return REG_ESPACE;
1363     }
1364   else
1365     fs = NULL;
1366
1367   cur_node = dfa->init_node;
1368   re_node_set_init_empty (&eps_via_nodes);
1369
1370   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1371     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1372   else
1373     {
1374       prev_idx_match = re_malloc (regmatch_t, nmatch);
1375       if (prev_idx_match == NULL)
1376         {
1377           free_fail_stack_return (fs);
1378           return REG_ESPACE;
1379         }
1380       prev_idx_match_malloced = 1;
1381     }
1382   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1383
1384   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1385     {
1386       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1387
1388       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1389         {
1390           int reg_idx;
1391           if (fs)
1392             {
1393               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1394                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1395                   break;
1396               if (reg_idx == nmatch)
1397                 {
1398                   re_node_set_free (&eps_via_nodes);
1399                   if (prev_idx_match_malloced)
1400                     re_free (prev_idx_match);
1401                   return free_fail_stack_return (fs);
1402                 }
1403               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1404                                          &eps_via_nodes);
1405             }
1406           else
1407             {
1408               re_node_set_free (&eps_via_nodes);
1409               if (prev_idx_match_malloced)
1410                 re_free (prev_idx_match);
1411               return REG_NOERROR;
1412             }
1413         }
1414
1415       /* Proceed to next node.  */
1416       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1417                                     &eps_via_nodes, fs);
1418
1419       if (BE (cur_node < 0, 0))
1420         {
1421           if (BE (cur_node == -2, 0))
1422             {
1423               re_node_set_free (&eps_via_nodes);
1424               if (prev_idx_match_malloced)
1425                 re_free (prev_idx_match);
1426               free_fail_stack_return (fs);
1427               return REG_ESPACE;
1428             }
1429           if (fs)
1430             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1431                                        &eps_via_nodes);
1432           else
1433             {
1434               re_node_set_free (&eps_via_nodes);
1435               if (prev_idx_match_malloced)
1436                 re_free (prev_idx_match);
1437               return REG_NOMATCH;
1438             }
1439         }
1440     }
1441   re_node_set_free (&eps_via_nodes);
1442   if (prev_idx_match_malloced)
1443     re_free (prev_idx_match);
1444   return free_fail_stack_return (fs);
1445 }
1446
1447 static reg_errcode_t
1448 internal_function
1449 free_fail_stack_return (struct re_fail_stack_t *fs)
1450 {
1451   if (fs)
1452     {
1453       int fs_idx;
1454       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1455         {
1456           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1457           re_free (fs->stack[fs_idx].regs);
1458         }
1459       re_free (fs->stack);
1460     }
1461   return REG_NOERROR;
1462 }
1463
1464 static void
1465 internal_function
1466 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1467              regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1468 {
1469   int type = dfa->nodes[cur_node].type;
1470   if (type == OP_OPEN_SUBEXP)
1471     {
1472       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1473
1474       /* We are at the first node of this sub expression.  */
1475       if (reg_num < nmatch)
1476         {
1477           pmatch[reg_num].rm_so = cur_idx;
1478           pmatch[reg_num].rm_eo = -1;
1479         }
1480     }
1481   else if (type == OP_CLOSE_SUBEXP)
1482     {
1483       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1484       if (reg_num < nmatch)
1485         {
1486           /* We are at the last node of this sub expression.  */
1487           if (pmatch[reg_num].rm_so < cur_idx)
1488             {
1489               pmatch[reg_num].rm_eo = cur_idx;
1490               /* This is a non-empty match or we are not inside an optional
1491                  subexpression.  Accept this right away.  */
1492               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1493             }
1494           else
1495             {
1496               if (dfa->nodes[cur_node].opt_subexp
1497                   && prev_idx_match[reg_num].rm_so != -1)
1498                 /* We transited through an empty match for an optional
1499                    subexpression, like (a?)*, and this is not the subexp's
1500                    first match.  Copy back the old content of the registers
1501                    so that matches of an inner subexpression are undone as
1502                    well, like in ((a?))*.  */
1503                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1504               else
1505                 /* We completed a subexpression, but it may be part of
1506                    an optional one, so do not update PREV_IDX_MATCH.  */
1507                 pmatch[reg_num].rm_eo = cur_idx;
1508             }
1509         }
1510     }
1511 }
1512
1513 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1514    and sift the nodes in each states according to the following rules.
1515    Updated state_log will be wrote to STATE_LOG.
1516
1517    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1518      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1519         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1520         the LAST_NODE, we throw away the node `a'.
1521      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1522         string `s' and transit to `b':
1523         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1524            away the node `a'.
1525         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1526             thrown away, we throw away the node `a'.
1527      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1528         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1529            node `a'.
1530         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1531             we throw away the node `a'.  */
1532
1533 #define STATE_NODE_CONTAINS(state,node) \
1534   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1535
1536 static reg_errcode_t
1537 internal_function
1538 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1539 {
1540   reg_errcode_t err;
1541   int null_cnt = 0;
1542   int str_idx = sctx->last_str_idx;
1543   re_node_set cur_dest;
1544
1545 #ifdef DEBUG
1546   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1547 #endif
1548
1549   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1550      transit to the last_node and the last_node itself.  */
1551   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1552   if (BE (err != REG_NOERROR, 0))
1553     return err;
1554   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1555   if (BE (err != REG_NOERROR, 0))
1556     goto free_return;
1557
1558   /* Then check each states in the state_log.  */
1559   while (str_idx > 0)
1560     {
1561       /* Update counters.  */
1562       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1563       if (null_cnt > mctx->max_mb_elem_len)
1564         {
1565           memset (sctx->sifted_states, '\0',
1566                   sizeof (re_dfastate_t *) * str_idx);
1567           re_node_set_free (&cur_dest);
1568           return REG_NOERROR;
1569         }
1570       re_node_set_empty (&cur_dest);
1571       --str_idx;
1572
1573       if (mctx->state_log[str_idx])
1574         {
1575           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1576           if (BE (err != REG_NOERROR, 0))
1577             goto free_return;
1578         }
1579
1580       /* Add all the nodes which satisfy the following conditions:
1581          - It can epsilon transit to a node in CUR_DEST.
1582          - It is in CUR_SRC.
1583          And update state_log.  */
1584       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1585       if (BE (err != REG_NOERROR, 0))
1586         goto free_return;
1587     }
1588   err = REG_NOERROR;
1589  free_return:
1590   re_node_set_free (&cur_dest);
1591   return err;
1592 }
1593
1594 static reg_errcode_t
1595 internal_function
1596 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1597                      int str_idx, re_node_set *cur_dest)
1598 {
1599   const re_dfa_t *const dfa = mctx->dfa;
1600   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1601   int i;
1602
1603   /* Then build the next sifted state.
1604      We build the next sifted state on `cur_dest', and update
1605      `sifted_states[str_idx]' with `cur_dest'.
1606      Note:
1607      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1608      `cur_src' points the node_set of the old `state_log[str_idx]'
1609      (with the epsilon nodes pre-filtered out).  */
1610   for (i = 0; i < cur_src->nelem; i++)
1611     {
1612       int prev_node = cur_src->elems[i];
1613       int naccepted = 0;
1614       int ret;
1615
1616 #ifdef DEBUG
1617       re_token_type_t type = dfa->nodes[prev_node].type;
1618       assert (!IS_EPSILON_NODE (type));
1619 #endif
1620 #ifdef RE_ENABLE_I18N
1621       /* If the node may accept `multi byte'.  */
1622       if (dfa->nodes[prev_node].accept_mb)
1623         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1624                                          str_idx, sctx->last_str_idx);
1625 #endif /* RE_ENABLE_I18N */
1626
1627       /* We don't check backreferences here.
1628          See update_cur_sifted_state().  */
1629       if (!naccepted
1630           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1631           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1632                                   dfa->nexts[prev_node]))
1633         naccepted = 1;
1634
1635       if (naccepted == 0)
1636         continue;
1637
1638       if (sctx->limits.nelem)
1639         {
1640           int to_idx = str_idx + naccepted;
1641           if (check_dst_limits (mctx, &sctx->limits,
1642                                 dfa->nexts[prev_node], to_idx,
1643                                 prev_node, str_idx))
1644             continue;
1645         }
1646       ret = re_node_set_insert (cur_dest, prev_node);
1647       if (BE (ret == -1, 0))
1648         return REG_ESPACE;
1649     }
1650
1651   return REG_NOERROR;
1652 }
1653
1654 /* Helper functions.  */
1655
1656 static reg_errcode_t
1657 internal_function
1658 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1659 {
1660   int top = mctx->state_log_top;
1661
1662   if (next_state_log_idx >= mctx->input.bufs_len
1663       || (next_state_log_idx >= mctx->input.valid_len
1664           && mctx->input.valid_len < mctx->input.len))
1665     {
1666       reg_errcode_t err;
1667       err = extend_buffers (mctx);
1668       if (BE (err != REG_NOERROR, 0))
1669         return err;
1670     }
1671
1672   if (top < next_state_log_idx)
1673     {
1674       memset (mctx->state_log + top + 1, '\0',
1675               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1676       mctx->state_log_top = next_state_log_idx;
1677     }
1678   return REG_NOERROR;
1679 }
1680
1681 static reg_errcode_t
1682 internal_function
1683 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1684                    re_dfastate_t **src, int num)
1685 {
1686   int st_idx;
1687   reg_errcode_t err;
1688   for (st_idx = 0; st_idx < num; ++st_idx)
1689     {
1690       if (dst[st_idx] == NULL)
1691         dst[st_idx] = src[st_idx];
1692       else if (src[st_idx] != NULL)
1693         {
1694           re_node_set merged_set;
1695           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1696                                         &src[st_idx]->nodes);
1697           if (BE (err != REG_NOERROR, 0))
1698             return err;
1699           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1700           re_node_set_free (&merged_set);
1701           if (BE (err != REG_NOERROR, 0))
1702             return err;
1703         }
1704     }
1705   return REG_NOERROR;
1706 }
1707
1708 static reg_errcode_t
1709 internal_function
1710 update_cur_sifted_state (const re_match_context_t *mctx,
1711                          re_sift_context_t *sctx, int str_idx,
1712                          re_node_set *dest_nodes)
1713 {
1714   const re_dfa_t *const dfa = mctx->dfa;
1715   reg_errcode_t err = REG_NOERROR;
1716   const re_node_set *candidates;
1717   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1718                 : &mctx->state_log[str_idx]->nodes);
1719
1720   if (dest_nodes->nelem == 0)
1721     sctx->sifted_states[str_idx] = NULL;
1722   else
1723     {
1724       if (candidates)
1725         {
1726           /* At first, add the nodes which can epsilon transit to a node in
1727              DEST_NODE.  */
1728           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1729           if (BE (err != REG_NOERROR, 0))
1730             return err;
1731
1732           /* Then, check the limitations in the current sift_context.  */
1733           if (sctx->limits.nelem)
1734             {
1735               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1736                                          mctx->bkref_ents, str_idx);
1737               if (BE (err != REG_NOERROR, 0))
1738                 return err;
1739             }
1740         }
1741
1742       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1743       if (BE (err != REG_NOERROR, 0))
1744         return err;
1745     }
1746
1747   if (candidates && mctx->state_log[str_idx]->has_backref)
1748     {
1749       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1750       if (BE (err != REG_NOERROR, 0))
1751         return err;
1752     }
1753   return REG_NOERROR;
1754 }
1755
1756 static reg_errcode_t
1757 internal_function
1758 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1759                        const re_node_set *candidates)
1760 {
1761   reg_errcode_t err = REG_NOERROR;
1762   int i;
1763
1764   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1765   if (BE (err != REG_NOERROR, 0))
1766     return err;
1767
1768   if (!state->inveclosure.alloc)
1769     {
1770       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1771       if (BE (err != REG_NOERROR, 0))
1772         return REG_ESPACE;
1773       for (i = 0; i < dest_nodes->nelem; i++)
1774         re_node_set_merge (&state->inveclosure,
1775                            dfa->inveclosures + dest_nodes->elems[i]);
1776     }
1777   return re_node_set_add_intersect (dest_nodes, candidates,
1778                                     &state->inveclosure);
1779 }
1780
1781 static reg_errcode_t
1782 internal_function
1783 sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1784                        const re_node_set *candidates)
1785 {
1786     int ecl_idx;
1787     reg_errcode_t err;
1788     re_node_set *inv_eclosure = dfa->inveclosures + node;
1789     re_node_set except_nodes;
1790     re_node_set_init_empty (&except_nodes);
1791     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1792       {
1793         int cur_node = inv_eclosure->elems[ecl_idx];
1794         if (cur_node == node)
1795           continue;
1796         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1797           {
1798             int edst1 = dfa->edests[cur_node].elems[0];
1799             int edst2 = ((dfa->edests[cur_node].nelem > 1)
1800                          ? dfa->edests[cur_node].elems[1] : -1);
1801             if ((!re_node_set_contains (inv_eclosure, edst1)
1802                  && re_node_set_contains (dest_nodes, edst1))
1803                 || (edst2 > 0
1804                     && !re_node_set_contains (inv_eclosure, edst2)
1805                     && re_node_set_contains (dest_nodes, edst2)))
1806               {
1807                 err = re_node_set_add_intersect (&except_nodes, candidates,
1808                                                  dfa->inveclosures + cur_node);
1809                 if (BE (err != REG_NOERROR, 0))
1810                   {
1811                     re_node_set_free (&except_nodes);
1812                     return err;
1813                   }
1814               }
1815           }
1816       }
1817     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1818       {
1819         int cur_node = inv_eclosure->elems[ecl_idx];
1820         if (!re_node_set_contains (&except_nodes, cur_node))
1821           {
1822             int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1823             re_node_set_remove_at (dest_nodes, idx);
1824           }
1825       }
1826     re_node_set_free (&except_nodes);
1827     return REG_NOERROR;
1828 }
1829
1830 static int
1831 internal_function
1832 check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1833                   int dst_node, int dst_idx, int src_node, int src_idx)
1834 {
1835   const re_dfa_t *const dfa = mctx->dfa;
1836   int lim_idx, src_pos, dst_pos;
1837
1838   int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1839   int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1840   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1841     {
1842       int subexp_idx;
1843       struct re_backref_cache_entry *ent;
1844       ent = mctx->bkref_ents + limits->elems[lim_idx];
1845       subexp_idx = dfa->nodes[ent->node].opr.idx;
1846
1847       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1848                                            subexp_idx, dst_node, dst_idx,
1849                                            dst_bkref_idx);
1850       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1851                                            subexp_idx, src_node, src_idx,
1852                                            src_bkref_idx);
1853
1854       /* In case of:
1855          <src> <dst> ( <subexp> )
1856          ( <subexp> ) <src> <dst>
1857          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1858       if (src_pos == dst_pos)
1859         continue; /* This is unrelated limitation.  */
1860       else
1861         return 1;
1862     }
1863   return 0;
1864 }
1865
1866 static int
1867 internal_function
1868 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1869                              int subexp_idx, int from_node, int bkref_idx)
1870 {
1871   const re_dfa_t *const dfa = mctx->dfa;
1872   const re_node_set *eclosures = dfa->eclosures + from_node;
1873   int node_idx;
1874
1875   /* Else, we are on the boundary: examine the nodes on the epsilon
1876      closure.  */
1877   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1878     {
1879       int node = eclosures->elems[node_idx];
1880       switch (dfa->nodes[node].type)
1881         {
1882         case OP_BACK_REF:
1883           if (bkref_idx != -1)
1884             {
1885               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1886               do
1887                 {
1888                   int dst, cpos;
1889
1890                   if (ent->node != node)
1891                     continue;
1892
1893                   if (subexp_idx < BITSET_WORD_BITS
1894                       && !(ent->eps_reachable_subexps_map
1895                            & ((bitset_word_t) 1 << subexp_idx)))
1896                     continue;
1897
1898                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1899                      OP_CLOSE_SUBEXP cases below.  But, if the
1900                      destination node is the same node as the source
1901                      node, don't recurse because it would cause an
1902                      infinite loop: a regex that exhibits this behavior
1903                      is ()\1*\1*  */
1904                   dst = dfa->edests[node].elems[0];
1905                   if (dst == from_node)
1906                     {
1907                       if (boundaries & 1)
1908                         return -1;
1909                       else /* if (boundaries & 2) */
1910                         return 0;
1911                     }
1912
1913                   cpos =
1914                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1915                                                  dst, bkref_idx);
1916                   if (cpos == -1 /* && (boundaries & 1) */)
1917                     return -1;
1918                   if (cpos == 0 && (boundaries & 2))
1919                     return 0;
1920
1921                   if (subexp_idx < BITSET_WORD_BITS)
1922                     ent->eps_reachable_subexps_map
1923                       &= ~((bitset_word_t) 1 << subexp_idx);
1924                 }
1925               while (ent++->more);
1926             }
1927           break;
1928
1929         case OP_OPEN_SUBEXP:
1930           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1931             return -1;
1932           break;
1933
1934         case OP_CLOSE_SUBEXP:
1935           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1936             return 0;
1937           break;
1938
1939         default:
1940             break;
1941         }
1942     }
1943
1944   return (boundaries & 2) ? 1 : 0;
1945 }
1946
1947 static int
1948 internal_function
1949 check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
1950                            int subexp_idx, int from_node, int str_idx,
1951                            int bkref_idx)
1952 {
1953   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1954   int boundaries;
1955
1956   /* If we are outside the range of the subexpression, return -1 or 1.  */
1957   if (str_idx < lim->subexp_from)
1958     return -1;
1959
1960   if (lim->subexp_to < str_idx)
1961     return 1;
1962
1963   /* If we are within the subexpression, return 0.  */
1964   boundaries = (str_idx == lim->subexp_from);
1965   boundaries |= (str_idx == lim->subexp_to) << 1;
1966   if (boundaries == 0)
1967     return 0;
1968
1969   /* Else, examine epsilon closure.  */
1970   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1971                                       from_node, bkref_idx);
1972 }
1973
1974 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1975    which are against limitations from DEST_NODES. */
1976
1977 static reg_errcode_t
1978 internal_function
1979 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
1980                      const re_node_set *candidates, re_node_set *limits,
1981                      struct re_backref_cache_entry *bkref_ents, int str_idx)
1982 {
1983   reg_errcode_t err;
1984   int node_idx, lim_idx;
1985
1986   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1987     {
1988       int subexp_idx;
1989       struct re_backref_cache_entry *ent;
1990       ent = bkref_ents + limits->elems[lim_idx];
1991
1992       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1993         continue; /* This is unrelated limitation.  */
1994
1995       subexp_idx = dfa->nodes[ent->node].opr.idx;
1996       if (ent->subexp_to == str_idx)
1997         {
1998           int ops_node = -1;
1999           int cls_node = -1;
2000           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2001             {
2002               int node = dest_nodes->elems[node_idx];
2003               re_token_type_t type = dfa->nodes[node].type;
2004               if (type == OP_OPEN_SUBEXP
2005                   && subexp_idx == dfa->nodes[node].opr.idx)
2006                 ops_node = node;
2007               else if (type == OP_CLOSE_SUBEXP
2008                        && subexp_idx == dfa->nodes[node].opr.idx)
2009                 cls_node = node;
2010             }
2011
2012           /* Check the limitation of the open subexpression.  */
2013           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2014           if (ops_node >= 0)
2015             {
2016               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2017                                            candidates);
2018               if (BE (err != REG_NOERROR, 0))
2019                 return err;
2020             }
2021
2022           /* Check the limitation of the close subexpression.  */
2023           if (cls_node >= 0)
2024             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2025               {
2026                 int node = dest_nodes->elems[node_idx];
2027                 if (!re_node_set_contains (dfa->inveclosures + node,
2028                                            cls_node)
2029                     && !re_node_set_contains (dfa->eclosures + node,
2030                                               cls_node))
2031                   {
2032                     /* It is against this limitation.
2033                        Remove it form the current sifted state.  */
2034                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2035                                                  candidates);
2036                     if (BE (err != REG_NOERROR, 0))
2037                       return err;
2038                     --node_idx;
2039                   }
2040               }
2041         }
2042       else /* (ent->subexp_to != str_idx)  */
2043         {
2044           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2045             {
2046               int node = dest_nodes->elems[node_idx];
2047               re_token_type_t type = dfa->nodes[node].type;
2048               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2049                 {
2050                   if (subexp_idx != dfa->nodes[node].opr.idx)
2051                     continue;
2052                   /* It is against this limitation.
2053                      Remove it form the current sifted state.  */
2054                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2055                                                candidates);
2056                   if (BE (err != REG_NOERROR, 0))
2057                     return err;
2058                 }
2059             }
2060         }
2061     }
2062   return REG_NOERROR;
2063 }
2064
2065 static reg_errcode_t
2066 internal_function
2067 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2068                    int str_idx, const re_node_set *candidates)
2069 {
2070   const re_dfa_t *const dfa = mctx->dfa;
2071   reg_errcode_t err;
2072   int node_idx, node;
2073   re_sift_context_t local_sctx;
2074   int first_idx = search_cur_bkref_entry (mctx, str_idx);
2075
2076   if (first_idx == -1)
2077     return REG_NOERROR;
2078
2079   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2080
2081   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2082     {
2083       int enabled_idx;
2084       re_token_type_t type;
2085       struct re_backref_cache_entry *entry;
2086       node = candidates->elems[node_idx];
2087       type = dfa->nodes[node].type;
2088       /* Avoid infinite loop for the REs like "()\1+".  */
2089       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2090         continue;
2091       if (type != OP_BACK_REF)
2092         continue;
2093
2094       entry = mctx->bkref_ents + first_idx;
2095       enabled_idx = first_idx;
2096       do
2097         {
2098           int subexp_len;
2099           int to_idx;
2100           int dst_node;
2101           int ret;
2102           re_dfastate_t *cur_state;
2103
2104           if (entry->node != node)
2105             continue;
2106           subexp_len = entry->subexp_to - entry->subexp_from;
2107           to_idx = str_idx + subexp_len;
2108           dst_node = (subexp_len ? dfa->nexts[node]
2109                       : dfa->edests[node].elems[0]);
2110
2111           if (to_idx > sctx->last_str_idx
2112               || sctx->sifted_states[to_idx] == NULL
2113               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2114               || check_dst_limits (mctx, &sctx->limits, node,
2115                                    str_idx, dst_node, to_idx))
2116             continue;
2117
2118           if (local_sctx.sifted_states == NULL)
2119             {
2120               local_sctx = *sctx;
2121               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2122               if (BE (err != REG_NOERROR, 0))
2123                 goto free_return;
2124             }
2125           local_sctx.last_node = node;
2126           local_sctx.last_str_idx = str_idx;
2127           ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2128           if (BE (ret < 0, 0))
2129             {
2130               err = REG_ESPACE;
2131               goto free_return;
2132             }
2133           cur_state = local_sctx.sifted_states[str_idx];
2134           err = sift_states_backward (mctx, &local_sctx);
2135           if (BE (err != REG_NOERROR, 0))
2136             goto free_return;
2137           if (sctx->limited_states != NULL)
2138             {
2139               err = merge_state_array (dfa, sctx->limited_states,
2140                                        local_sctx.sifted_states,
2141                                        str_idx + 1);
2142               if (BE (err != REG_NOERROR, 0))
2143                 goto free_return;
2144             }
2145           local_sctx.sifted_states[str_idx] = cur_state;
2146           re_node_set_remove (&local_sctx.limits, enabled_idx);
2147
2148           /* mctx->bkref_ents may have changed, reload the pointer.  */
2149           entry = mctx->bkref_ents + enabled_idx;
2150         }
2151       while (enabled_idx++, entry++->more);
2152     }
2153   err = REG_NOERROR;
2154  free_return:
2155   if (local_sctx.sifted_states != NULL)
2156     {
2157       re_node_set_free (&local_sctx.limits);
2158     }
2159
2160   return err;
2161 }
2162
2163
2164 #ifdef RE_ENABLE_I18N
2165 static int
2166 internal_function
2167 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2168                      int node_idx, int str_idx, int max_str_idx)
2169 {
2170   const re_dfa_t *const dfa = mctx->dfa;
2171   int naccepted;
2172   /* Check the node can accept `multi byte'.  */
2173   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2174   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2175       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2176                             dfa->nexts[node_idx]))
2177     /* The node can't accept the `multi byte', or the
2178        destination was already thrown away, then the node
2179        could't accept the current input `multi byte'.   */
2180     naccepted = 0;
2181   /* Otherwise, it is sure that the node could accept
2182      `naccepted' bytes input.  */
2183   return naccepted;
2184 }
2185 #endif /* RE_ENABLE_I18N */
2186
2187
2188 /* Functions for state transition.  */
2189
2190 /* Return the next state to which the current state STATE will transit by
2191    accepting the current input byte, and update STATE_LOG if necessary.
2192    If STATE can accept a multibyte char/collating element/back reference
2193    update the destination of STATE_LOG.  */
2194
2195 static re_dfastate_t *
2196 internal_function
2197 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2198                re_dfastate_t *state)
2199 {
2200   re_dfastate_t **trtable;
2201   unsigned char ch;
2202
2203 #ifdef RE_ENABLE_I18N
2204   /* If the current state can accept multibyte.  */
2205   if (BE (state->accept_mb, 0))
2206     {
2207       *err = transit_state_mb (mctx, state);
2208       if (BE (*err != REG_NOERROR, 0))
2209         return NULL;
2210     }
2211 #endif /* RE_ENABLE_I18N */
2212
2213   /* Then decide the next state with the single byte.  */
2214 #if 0
2215   if (0)
2216     /* don't use transition table  */
2217     return transit_state_sb (err, mctx, state);
2218 #endif
2219
2220   /* Use transition table  */
2221   ch = re_string_fetch_byte (&mctx->input);
2222   for (;;)
2223     {
2224       trtable = state->trtable;
2225       if (BE (trtable != NULL, 1))
2226         return trtable[ch];
2227
2228       trtable = state->word_trtable;
2229       if (BE (trtable != NULL, 1))
2230         {
2231           unsigned int context;
2232           context
2233             = re_string_context_at (&mctx->input,
2234                                     re_string_cur_idx (&mctx->input) - 1,
2235                                     mctx->eflags);
2236           if (IS_WORD_CONTEXT (context))
2237             return trtable[ch + SBC_MAX];
2238           else
2239             return trtable[ch];
2240         }
2241
2242       if (!build_trtable (mctx->dfa, state))
2243         {
2244           *err = REG_ESPACE;
2245           return NULL;
2246         }
2247
2248       /* Retry, we now have a transition table.  */
2249     }
2250 }
2251
2252 /* Update the state_log if we need */
2253 re_dfastate_t *
2254 internal_function
2255 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2256                       re_dfastate_t *next_state)
2257 {
2258   const re_dfa_t *const dfa = mctx->dfa;
2259   int cur_idx = re_string_cur_idx (&mctx->input);
2260
2261   if (cur_idx > mctx->state_log_top)
2262     {
2263       mctx->state_log[cur_idx] = next_state;
2264       mctx->state_log_top = cur_idx;
2265     }
2266   else if (mctx->state_log[cur_idx] == 0)
2267     {
2268       mctx->state_log[cur_idx] = next_state;
2269     }
2270   else
2271     {
2272       re_dfastate_t *pstate;
2273       unsigned int context;
2274       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2275       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2276          the destination of a multibyte char/collating element/
2277          back reference.  Then the next state is the union set of
2278          these destinations and the results of the transition table.  */
2279       pstate = mctx->state_log[cur_idx];
2280       log_nodes = pstate->entrance_nodes;
2281       if (next_state != NULL)
2282         {
2283           table_nodes = next_state->entrance_nodes;
2284           *err = re_node_set_init_union (&next_nodes, table_nodes,
2285                                              log_nodes);
2286           if (BE (*err != REG_NOERROR, 0))
2287             return NULL;
2288         }
2289       else
2290         next_nodes = *log_nodes;
2291       /* Note: We already add the nodes of the initial state,
2292          then we don't need to add them here.  */
2293
2294       context = re_string_context_at (&mctx->input,
2295                                       re_string_cur_idx (&mctx->input) - 1,
2296                                       mctx->eflags);
2297       next_state = mctx->state_log[cur_idx]
2298         = re_acquire_state_context (err, dfa, &next_nodes, context);
2299       /* We don't need to check errors here, since the return value of
2300          this function is next_state and ERR is already set.  */
2301
2302       if (table_nodes != NULL)
2303         re_node_set_free (&next_nodes);
2304     }
2305
2306   if (BE (dfa->nbackref, 0) && next_state != NULL)
2307     {
2308       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2309          later.  We must check them here, since the back references in the
2310          next state might use them.  */
2311       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2312                                         cur_idx);
2313       if (BE (*err != REG_NOERROR, 0))
2314         return NULL;
2315
2316       /* If the next state has back references.  */
2317       if (next_state->has_backref)
2318         {
2319           *err = transit_state_bkref (mctx, &next_state->nodes);
2320           if (BE (*err != REG_NOERROR, 0))
2321             return NULL;
2322           next_state = mctx->state_log[cur_idx];
2323         }
2324     }
2325
2326   return next_state;
2327 }
2328
2329 /* Skip bytes in the input that correspond to part of a
2330    multi-byte match, then look in the log for a state
2331    from which to restart matching.  */
2332 re_dfastate_t *
2333 internal_function
2334 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2335 {
2336   re_dfastate_t *cur_state;
2337   do
2338     {
2339       int max = mctx->state_log_top;
2340       int cur_str_idx = re_string_cur_idx (&mctx->input);
2341
2342       do
2343         {
2344           if (++cur_str_idx > max)
2345             return NULL;
2346           re_string_skip_bytes (&mctx->input, 1);
2347         }
2348       while (mctx->state_log[cur_str_idx] == NULL);
2349
2350       cur_state = merge_state_with_log (err, mctx, NULL);
2351     }
2352   while (*err == REG_NOERROR && cur_state == NULL);
2353   return cur_state;
2354 }
2355
2356 /* Helper functions for transit_state.  */
2357
2358 /* From the node set CUR_NODES, pick up the nodes whose types are
2359    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2360    expression. And register them to use them later for evaluating the
2361    correspoding back references.  */
2362
2363 static reg_errcode_t
2364 internal_function
2365 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2366                            int str_idx)
2367 {
2368   const re_dfa_t *const dfa = mctx->dfa;
2369   int node_idx;
2370   reg_errcode_t err;
2371
2372   /* TODO: This isn't efficient.
2373            Because there might be more than one nodes whose types are
2374            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2375            nodes.
2376            E.g. RE: (a){2}  */
2377   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2378     {
2379       int node = cur_nodes->elems[node_idx];
2380       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2381           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2382           && (dfa->used_bkref_map
2383               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2384         {
2385           err = match_ctx_add_subtop (mctx, node, str_idx);
2386           if (BE (err != REG_NOERROR, 0))
2387             return err;
2388         }
2389     }
2390   return REG_NOERROR;
2391 }
2392
2393 #if 0
2394 /* Return the next state to which the current state STATE will transit by
2395    accepting the current input byte.  */
2396
2397 static re_dfastate_t *
2398 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2399                   re_dfastate_t *state)
2400 {
2401   const re_dfa_t *const dfa = mctx->dfa;
2402   re_node_set next_nodes;
2403   re_dfastate_t *next_state;
2404   int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2405   unsigned int context;
2406
2407   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2408   if (BE (*err != REG_NOERROR, 0))
2409     return NULL;
2410   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2411     {
2412       int cur_node = state->nodes.elems[node_cnt];
2413       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2414         {
2415           *err = re_node_set_merge (&next_nodes,
2416                                     dfa->eclosures + dfa->nexts[cur_node]);
2417           if (BE (*err != REG_NOERROR, 0))
2418             {
2419               re_node_set_free (&next_nodes);
2420               return NULL;
2421             }
2422         }
2423     }
2424   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2425   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2426   /* We don't need to check errors here, since the return value of
2427      this function is next_state and ERR is already set.  */
2428
2429   re_node_set_free (&next_nodes);
2430   re_string_skip_bytes (&mctx->input, 1);
2431   return next_state;
2432 }
2433 #endif
2434
2435 #ifdef RE_ENABLE_I18N
2436 static reg_errcode_t
2437 internal_function
2438 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2439 {
2440   const re_dfa_t *const dfa = mctx->dfa;
2441   reg_errcode_t err;
2442   int i;
2443
2444   for (i = 0; i < pstate->nodes.nelem; ++i)
2445     {
2446       re_node_set dest_nodes, *new_nodes;
2447       int cur_node_idx = pstate->nodes.elems[i];
2448       int naccepted, dest_idx;
2449       unsigned int context;
2450       re_dfastate_t *dest_state;
2451
2452       if (!dfa->nodes[cur_node_idx].accept_mb)
2453         continue;
2454
2455       if (dfa->nodes[cur_node_idx].constraint)
2456         {
2457           context = re_string_context_at (&mctx->input,
2458                                           re_string_cur_idx (&mctx->input),
2459                                           mctx->eflags);
2460           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2461                                            context))
2462             continue;
2463         }
2464
2465       /* How many bytes the node can accept?  */
2466       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2467                                            re_string_cur_idx (&mctx->input));
2468       if (naccepted == 0)
2469         continue;
2470
2471       /* The node can accepts `naccepted' bytes.  */
2472       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2473       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2474                                : mctx->max_mb_elem_len);
2475       err = clean_state_log_if_needed (mctx, dest_idx);
2476       if (BE (err != REG_NOERROR, 0))
2477         return err;
2478 #ifdef DEBUG
2479       assert (dfa->nexts[cur_node_idx] != -1);
2480 #endif
2481       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2482
2483       dest_state = mctx->state_log[dest_idx];
2484       if (dest_state == NULL)
2485         dest_nodes = *new_nodes;
2486       else
2487         {
2488           err = re_node_set_init_union (&dest_nodes,
2489                                         dest_state->entrance_nodes, new_nodes);
2490           if (BE (err != REG_NOERROR, 0))
2491             return err;
2492         }
2493       context = re_string_context_at (&mctx->input, dest_idx - 1,
2494                                       mctx->eflags);
2495       mctx->state_log[dest_idx]
2496         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2497       if (dest_state != NULL)
2498         re_node_set_free (&dest_nodes);
2499       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2500         return err;
2501     }
2502   return REG_NOERROR;
2503 }
2504 #endif /* RE_ENABLE_I18N */
2505
2506 static reg_errcode_t
2507 internal_function
2508 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2509 {
2510   const re_dfa_t *const dfa = mctx->dfa;
2511   reg_errcode_t err;
2512   int i;
2513   int cur_str_idx = re_string_cur_idx (&mctx->input);
2514
2515   for (i = 0; i < nodes->nelem; ++i)
2516     {
2517       int dest_str_idx, prev_nelem, bkc_idx;
2518       int node_idx = nodes->elems[i];
2519       unsigned int context;
2520       const re_token_t *node = dfa->nodes + node_idx;
2521       re_node_set *new_dest_nodes;
2522
2523       /* Check whether `node' is a backreference or not.  */
2524       if (node->type != OP_BACK_REF)
2525         continue;
2526
2527       if (node->constraint)
2528         {
2529           context = re_string_context_at (&mctx->input, cur_str_idx,
2530                                           mctx->eflags);
2531           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2532             continue;
2533         }
2534
2535       /* `node' is a backreference.
2536          Check the substring which the substring matched.  */
2537       bkc_idx = mctx->nbkref_ents;
2538       err = get_subexp (mctx, node_idx, cur_str_idx);
2539       if (BE (err != REG_NOERROR, 0))
2540         goto free_return;
2541
2542       /* And add the epsilon closures (which is `new_dest_nodes') of
2543          the backreference to appropriate state_log.  */
2544 #ifdef DEBUG
2545       assert (dfa->nexts[node_idx] != -1);
2546 #endif
2547       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2548         {
2549           int subexp_len;
2550           re_dfastate_t *dest_state;
2551           struct re_backref_cache_entry *bkref_ent;
2552           bkref_ent = mctx->bkref_ents + bkc_idx;
2553           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2554             continue;
2555           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2556           new_dest_nodes = (subexp_len == 0
2557                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2558                             : dfa->eclosures + dfa->nexts[node_idx]);
2559           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2560                           - bkref_ent->subexp_from);
2561           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2562                                           mctx->eflags);
2563           dest_state = mctx->state_log[dest_str_idx];
2564           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2565                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2566           /* Add `new_dest_node' to state_log.  */
2567           if (dest_state == NULL)
2568             {
2569               mctx->state_log[dest_str_idx]
2570                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2571                                             context);
2572               if (BE (mctx->state_log[dest_str_idx] == NULL
2573                       && err != REG_NOERROR, 0))
2574                 goto free_return;
2575             }
2576           else
2577             {
2578               re_node_set dest_nodes;
2579               err = re_node_set_init_union (&dest_nodes,
2580                                             dest_state->entrance_nodes,
2581                                             new_dest_nodes);
2582               if (BE (err != REG_NOERROR, 0))
2583                 {
2584                   re_node_set_free (&dest_nodes);
2585                   goto free_return;
2586                 }
2587               mctx->state_log[dest_str_idx]
2588                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2589               re_node_set_free (&dest_nodes);
2590               if (BE (mctx->state_log[dest_str_idx] == NULL
2591                       && err != REG_NOERROR, 0))
2592                 goto free_return;
2593             }
2594           /* We need to check recursively if the backreference can epsilon
2595              transit.  */
2596           if (subexp_len == 0
2597               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2598             {
2599               err = check_subexp_matching_top (mctx, new_dest_nodes,
2600                                                cur_str_idx);
2601               if (BE (err != REG_NOERROR, 0))
2602                 goto free_return;
2603               err = transit_state_bkref (mctx, new_dest_nodes);
2604               if (BE (err != REG_NOERROR, 0))
2605                 goto free_return;
2606             }
2607         }
2608     }
2609   err = REG_NOERROR;
2610  free_return:
2611   return err;
2612 }
2613
2614 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2615    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2616    Note that we might collect inappropriate candidates here.
2617    However, the cost of checking them strictly here is too high, then we
2618    delay these checking for prune_impossible_nodes().  */
2619
2620 static reg_errcode_t
2621 internal_function
2622 get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2623 {
2624   const re_dfa_t *const dfa = mctx->dfa;
2625   int subexp_num, sub_top_idx;
2626   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2627   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2628   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2629   if (cache_idx != -1)
2630     {
2631       const struct re_backref_cache_entry *entry
2632         = mctx->bkref_ents + cache_idx;
2633       do
2634         if (entry->node == bkref_node)
2635           return REG_NOERROR; /* We already checked it.  */
2636       while (entry++->more);
2637     }
2638
2639   subexp_num = dfa->nodes[bkref_node].opr.idx;
2640
2641   /* For each sub expression  */
2642   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2643     {
2644       reg_errcode_t err;
2645       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2646       re_sub_match_last_t *sub_last;
2647       int sub_last_idx, sl_str, bkref_str_off;
2648
2649       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2650         continue; /* It isn't related.  */
2651
2652       sl_str = sub_top->str_idx;
2653       bkref_str_off = bkref_str_idx;
2654       /* At first, check the last node of sub expressions we already
2655          evaluated.  */
2656       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2657         {
2658           int sl_str_diff;
2659           sub_last = sub_top->lasts[sub_last_idx];
2660           sl_str_diff = sub_last->str_idx - sl_str;
2661           /* The matched string by the sub expression match with the substring
2662              at the back reference?  */
2663           if (sl_str_diff > 0)
2664             {
2665               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2666                 {
2667                   /* Not enough chars for a successful match.  */
2668                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2669                     break;
2670
2671                   err = clean_state_log_if_needed (mctx,
2672                                                    bkref_str_off
2673                                                    + sl_str_diff);
2674                   if (BE (err != REG_NOERROR, 0))
2675                     return err;
2676                   buf = (const char *) re_string_get_buffer (&mctx->input);
2677                 }
2678               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2679                 /* We don't need to search this sub expression any more.  */
2680                 break;
2681             }
2682           bkref_str_off += sl_str_diff;
2683           sl_str += sl_str_diff;
2684           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2685                                 bkref_str_idx);
2686
2687           /* Reload buf, since the preceding call might have reallocated
2688              the buffer.  */
2689           buf = (const char *) re_string_get_buffer (&mctx->input);
2690
2691           if (err == REG_NOMATCH)
2692             continue;
2693           if (BE (err != REG_NOERROR, 0))
2694             return err;
2695         }
2696
2697       if (sub_last_idx < sub_top->nlasts)
2698         continue;
2699       if (sub_last_idx > 0)
2700         ++sl_str;
2701       /* Then, search for the other last nodes of the sub expression.  */
2702       for (; sl_str <= bkref_str_idx; ++sl_str)
2703         {
2704           int cls_node, sl_str_off;
2705           const re_node_set *nodes;
2706           sl_str_off = sl_str - sub_top->str_idx;
2707           /* The matched string by the sub expression match with the substring
2708              at the back reference?  */
2709           if (sl_str_off > 0)
2710             {
2711               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2712                 {
2713                   /* If we are at the end of the input, we cannot match.  */
2714                   if (bkref_str_off >= mctx->input.len)
2715                     break;
2716
2717                   err = extend_buffers (mctx);
2718                   if (BE (err != REG_NOERROR, 0))
2719                     return err;
2720
2721                   buf = (const char *) re_string_get_buffer (&mctx->input);
2722                 }
2723               if (buf [bkref_str_off++] != buf[sl_str - 1])
2724                 break; /* We don't need to search this sub expression
2725                           any more.  */
2726             }
2727           if (mctx->state_log[sl_str] == NULL)
2728             continue;
2729           /* Does this state have a ')' of the sub expression?  */
2730           nodes = &mctx->state_log[sl_str]->nodes;
2731           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2732                                        OP_CLOSE_SUBEXP);
2733           if (cls_node == -1)
2734             continue; /* No.  */
2735           if (sub_top->path == NULL)
2736             {
2737               sub_top->path = calloc (sizeof (state_array_t),
2738                                       sl_str - sub_top->str_idx + 1);
2739               if (sub_top->path == NULL)
2740                 return REG_ESPACE;
2741             }
2742           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2743              in the current context?  */
2744           err = check_arrival (mctx, sub_top->path, sub_top->node,
2745                                sub_top->str_idx, cls_node, sl_str,
2746                                OP_CLOSE_SUBEXP);
2747           if (err == REG_NOMATCH)
2748               continue;
2749           if (BE (err != REG_NOERROR, 0))
2750               return err;
2751           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2752           if (BE (sub_last == NULL, 0))
2753             return REG_ESPACE;
2754           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2755                                 bkref_str_idx);
2756           if (err == REG_NOMATCH)
2757             continue;
2758         }
2759     }
2760   return REG_NOERROR;
2761 }
2762
2763 /* Helper functions for get_subexp().  */
2764
2765 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2766    If it can arrive, register the sub expression expressed with SUB_TOP
2767    and SUB_LAST.  */
2768
2769 static reg_errcode_t
2770 internal_function
2771 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2772                 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2773 {
2774   reg_errcode_t err;
2775   int to_idx;
2776   /* Can the subexpression arrive the back reference?  */
2777   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2778                        sub_last->str_idx, bkref_node, bkref_str,
2779                        OP_OPEN_SUBEXP);
2780   if (err != REG_NOERROR)
2781     return err;
2782   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2783                              sub_last->str_idx);
2784   if (BE (err != REG_NOERROR, 0))
2785     return err;
2786   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2787   return clean_state_log_if_needed (mctx, to_idx);
2788 }
2789
2790 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2791    Search '(' if FL_OPEN, or search ')' otherwise.
2792    TODO: This function isn't efficient...
2793          Because there might be more than one nodes whose types are
2794          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2795          nodes.
2796          E.g. RE: (a){2}  */
2797
2798 static int
2799 internal_function
2800 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2801                   int subexp_idx, int type)
2802 {
2803   int cls_idx;
2804   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2805     {
2806       int cls_node = nodes->elems[cls_idx];
2807       const re_token_t *node = dfa->nodes + cls_node;
2808       if (node->type == type
2809           && node->opr.idx == subexp_idx)
2810         return cls_node;
2811     }
2812   return -1;
2813 }
2814
2815 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2816    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2817    heavily reused.
2818    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2819
2820 static reg_errcode_t
2821 internal_function
2822 check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2823                int top_str, int last_node, int last_str, int type)
2824 {
2825   const re_dfa_t *const dfa = mctx->dfa;
2826   reg_errcode_t err = REG_NOERROR;
2827   int subexp_num, backup_cur_idx, str_idx, null_cnt;
2828   re_dfastate_t *cur_state = NULL;
2829   re_node_set *cur_nodes, next_nodes;
2830   re_dfastate_t **backup_state_log;
2831   unsigned int context;
2832
2833   subexp_num = dfa->nodes[top_node].opr.idx;
2834   /* Extend the buffer if we need.  */
2835   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2836     {
2837       re_dfastate_t **new_array;
2838       int old_alloc = path->alloc;
2839       path->alloc += last_str + mctx->max_mb_elem_len + 1;
2840       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2841       if (BE (new_array == NULL, 0))
2842         {
2843           path->alloc = old_alloc;
2844           return REG_ESPACE;
2845         }
2846       path->array = new_array;
2847       memset (new_array + old_alloc, '\0',
2848               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2849     }
2850
2851   str_idx = path->next_idx ?: top_str;
2852
2853   /* Temporary modify MCTX.  */
2854   backup_state_log = mctx->state_log;
2855   backup_cur_idx = mctx->input.cur_idx;
2856   mctx->state_log = path->array;
2857   mctx->input.cur_idx = str_idx;
2858
2859   /* Setup initial node set.  */
2860   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2861   if (str_idx == top_str)
2862     {
2863       err = re_node_set_init_1 (&next_nodes, top_node);
2864       if (BE (err != REG_NOERROR, 0))
2865         return err;
2866       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2867       if (BE (err != REG_NOERROR, 0))
2868         {
2869           re_node_set_free (&next_nodes);
2870           return err;
2871         }
2872     }
2873   else
2874     {
2875       cur_state = mctx->state_log[str_idx];
2876       if (cur_state && cur_state->has_backref)
2877         {
2878           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2879           if (BE (err != REG_NOERROR, 0))
2880             return err;
2881         }
2882       else
2883         re_node_set_init_empty (&next_nodes);
2884     }
2885   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2886     {
2887       if (next_nodes.nelem)
2888         {
2889           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2890                                     subexp_num, type);
2891           if (BE (err != REG_NOERROR, 0))
2892             {
2893               re_node_set_free (&next_nodes);
2894               return err;
2895             }
2896         }
2897       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2898       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2899         {
2900           re_node_set_free (&next_nodes);
2901           return err;
2902         }
2903       mctx->state_log[str_idx] = cur_state;
2904     }
2905
2906   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2907     {
2908       re_node_set_empty (&next_nodes);
2909       if (mctx->state_log[str_idx + 1])
2910         {
2911           err = re_node_set_merge (&next_nodes,
2912                                    &mctx->state_log[str_idx + 1]->nodes);
2913           if (BE (err != REG_NOERROR, 0))
2914             {
2915               re_node_set_free (&next_nodes);
2916               return err;
2917             }
2918         }
2919       if (cur_state)
2920         {
2921           err = check_arrival_add_next_nodes (mctx, str_idx,
2922                                               &cur_state->non_eps_nodes,
2923                                               &next_nodes);
2924           if (BE (err != REG_NOERROR, 0))
2925             {
2926               re_node_set_free (&next_nodes);
2927               return err;
2928             }
2929         }
2930       ++str_idx;
2931       if (next_nodes.nelem)
2932         {
2933           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2934           if (BE (err != REG_NOERROR, 0))
2935             {
2936               re_node_set_free (&next_nodes);
2937               return err;
2938             }
2939           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2940                                     subexp_num, type);
2941           if (BE (err != REG_NOERROR, 0))
2942             {
2943               re_node_set_free (&next_nodes);
2944               return err;
2945             }
2946         }
2947       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2948       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2949       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2950         {
2951           re_node_set_free (&next_nodes);
2952           return err;
2953         }
2954       mctx->state_log[str_idx] = cur_state;
2955       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2956     }
2957   re_node_set_free (&next_nodes);
2958   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2959                : &mctx->state_log[last_str]->nodes);
2960   path->next_idx = str_idx;
2961
2962   /* Fix MCTX.  */
2963   mctx->state_log = backup_state_log;
2964   mctx->input.cur_idx = backup_cur_idx;
2965
2966   /* Then check the current node set has the node LAST_NODE.  */
2967   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2968     return REG_NOERROR;
2969
2970   return REG_NOMATCH;
2971 }
2972
2973 /* Helper functions for check_arrival.  */
2974
2975 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2976    to NEXT_NODES.
2977    TODO: This function is similar to the functions transit_state*(),
2978          however this function has many additional works.
2979          Can't we unify them?  */
2980
2981 static reg_errcode_t
2982 internal_function
2983 check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
2984                               re_node_set *cur_nodes, re_node_set *next_nodes)
2985 {
2986   const re_dfa_t *const dfa = mctx->dfa;
2987   int result;
2988   int cur_idx;
2989 #ifdef RE_ENABLE_I18N
2990   reg_errcode_t err = REG_NOERROR;
2991 #endif
2992   re_node_set union_set;
2993   re_node_set_init_empty (&union_set);
2994   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2995     {
2996       int naccepted = 0;
2997       int cur_node = cur_nodes->elems[cur_idx];
2998 #ifdef DEBUG
2999       re_token_type_t type = dfa->nodes[cur_node].type;
3000       assert (!IS_EPSILON_NODE (type));
3001 #endif
3002 #ifdef RE_ENABLE_I18N
3003       /* If the node may accept `multi byte'.  */
3004       if (dfa->nodes[cur_node].accept_mb)
3005         {
3006           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3007                                                str_idx);
3008           if (naccepted > 1)
3009             {
3010               re_dfastate_t *dest_state;
3011               int next_node = dfa->nexts[cur_node];
3012               int next_idx = str_idx + naccepted;
3013               dest_state = mctx->state_log[next_idx];
3014               re_node_set_empty (&union_set);
3015               if (dest_state)
3016                 {
3017                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3018                   if (BE (err != REG_NOERROR, 0))
3019                     {
3020                       re_node_set_free (&union_set);
3021                       return err;
3022                     }
3023                 }
3024               result = re_node_set_insert (&union_set, next_node);
3025               if (BE (result < 0, 0))
3026                 {
3027                   re_node_set_free (&union_set);
3028                   return REG_ESPACE;
3029                 }
3030               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3031                                                             &union_set);
3032               if (BE (mctx->state_log[next_idx] == NULL
3033                       && err != REG_NOERROR, 0))
3034                 {
3035                   re_node_set_free (&union_set);
3036                   return err;
3037                 }
3038             }
3039         }
3040 #endif /* RE_ENABLE_I18N */
3041       if (naccepted
3042           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3043         {
3044           result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3045           if (BE (result < 0, 0))
3046             {
3047               re_node_set_free (&union_set);
3048               return REG_ESPACE;
3049             }
3050         }
3051     }
3052   re_node_set_free (&union_set);
3053   return REG_NOERROR;
3054 }
3055
3056 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3057    CUR_NODES, however exclude the nodes which are:
3058     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3059     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3060 */
3061
3062 static reg_errcode_t
3063 internal_function
3064 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3065                           int ex_subexp, int type)
3066 {
3067   reg_errcode_t err;
3068   int idx, outside_node;
3069   re_node_set new_nodes;
3070 #ifdef DEBUG
3071   assert (cur_nodes->nelem);
3072 #endif
3073   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3074   if (BE (err != REG_NOERROR, 0))
3075     return err;
3076   /* Create a new node set NEW_NODES with the nodes which are epsilon
3077      closures of the node in CUR_NODES.  */
3078
3079   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3080     {
3081       int cur_node = cur_nodes->elems[idx];
3082       const re_node_set *eclosure = dfa->eclosures + cur_node;
3083       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3084       if (outside_node == -1)
3085         {
3086           /* There are no problematic nodes, just merge them.  */
3087           err = re_node_set_merge (&new_nodes, eclosure);
3088           if (BE (err != REG_NOERROR, 0))
3089             {
3090               re_node_set_free (&new_nodes);
3091               return err;
3092             }
3093         }
3094       else
3095         {
3096           /* There are problematic nodes, re-calculate incrementally.  */
3097           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3098                                               ex_subexp, type);
3099           if (BE (err != REG_NOERROR, 0))
3100             {
3101               re_node_set_free (&new_nodes);
3102               return err;
3103             }
3104         }
3105     }
3106   re_node_set_free (cur_nodes);
3107   *cur_nodes = new_nodes;
3108   return REG_NOERROR;
3109 }
3110
3111 /* Helper function for check_arrival_expand_ecl.
3112    Check incrementally the epsilon closure of TARGET, and if it isn't
3113    problematic append it to DST_NODES.  */
3114
3115 static reg_errcode_t
3116 internal_function
3117 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3118                               int target, int ex_subexp, int type)
3119 {
3120   int cur_node;
3121   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3122     {
3123       int err;
3124
3125       if (dfa->nodes[cur_node].type == type
3126           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3127         {
3128           if (type == OP_CLOSE_SUBEXP)
3129             {
3130               err = re_node_set_insert (dst_nodes, cur_node);
3131               if (BE (err == -1, 0))
3132                 return REG_ESPACE;
3133             }
3134           break;
3135         }
3136       err = re_node_set_insert (dst_nodes, cur_node);
3137       if (BE (err == -1, 0))
3138         return REG_ESPACE;
3139       if (dfa->edests[cur_node].nelem == 0)
3140         break;
3141       if (dfa->edests[cur_node].nelem == 2)
3142         {
3143           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3144                                               dfa->edests[cur_node].elems[1],
3145                                               ex_subexp, type);
3146           if (BE (err != REG_NOERROR, 0))
3147             return err;
3148         }
3149       cur_node = dfa->edests[cur_node].elems[0];
3150     }
3151   return REG_NOERROR;
3152 }
3153
3154
3155 /* For all the back references in the current state, calculate the
3156    destination of the back references by the appropriate entry
3157    in MCTX->BKREF_ENTS.  */
3158
3159 static reg_errcode_t
3160 internal_function
3161 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3162                     int cur_str, int subexp_num, int type)
3163 {
3164   const re_dfa_t *const dfa = mctx->dfa;
3165   reg_errcode_t err;
3166   int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3167   struct re_backref_cache_entry *ent;
3168
3169   if (cache_idx_start == -1)
3170     return REG_NOERROR;
3171
3172  restart:
3173   ent = mctx->bkref_ents + cache_idx_start;
3174   do
3175     {
3176       int to_idx, next_node;
3177
3178       /* Is this entry ENT is appropriate?  */
3179       if (!re_node_set_contains (cur_nodes, ent->node))
3180         continue; /* No.  */
3181
3182       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3183       /* Calculate the destination of the back reference, and append it
3184          to MCTX->STATE_LOG.  */
3185       if (to_idx == cur_str)
3186         {
3187           /* The backreference did epsilon transit, we must re-check all the
3188              node in the current state.  */
3189           re_node_set new_dests;
3190           reg_errcode_t err2, err3;
3191           next_node = dfa->edests[ent->node].elems[0];
3192           if (re_node_set_contains (cur_nodes, next_node))
3193             continue;
3194           err = re_node_set_init_1 (&new_dests, next_node);
3195           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3196           err3 = re_node_set_merge (cur_nodes, &new_dests);
3197           re_node_set_free (&new_dests);
3198           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3199                   || err3 != REG_NOERROR, 0))
3200             {
3201               err = (err != REG_NOERROR ? err
3202                      : (err2 != REG_NOERROR ? err2 : err3));
3203               return err;
3204             }
3205           /* TODO: It is still inefficient...  */
3206           goto restart;
3207         }
3208       else
3209         {
3210           re_node_set union_set;
3211           next_node = dfa->nexts[ent->node];
3212           if (mctx->state_log[to_idx])
3213             {
3214               int ret;
3215               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3216                                         next_node))
3217                 continue;
3218               err = re_node_set_init_copy (&union_set,
3219                                            &mctx->state_log[to_idx]->nodes);
3220               ret = re_node_set_insert (&union_set, next_node);
3221               if (BE (err != REG_NOERROR || ret < 0, 0))
3222                 {
3223                   re_node_set_free (&union_set);
3224                   err = err != REG_NOERROR ? err : REG_ESPACE;
3225                   return err;
3226                 }
3227             }
3228           else
3229             {
3230               err = re_node_set_init_1 (&union_set, next_node);
3231               if (BE (err != REG_NOERROR, 0))
3232                 return err;
3233             }
3234           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3235           re_node_set_free (&union_set);
3236           if (BE (mctx->state_log[to_idx] == NULL
3237                   && err != REG_NOERROR, 0))
3238             return err;
3239         }
3240     }
3241   while (ent++->more);
3242   return REG_NOERROR;
3243 }
3244
3245 /* Build transition table for the state.
3246    Return 1 if succeeded, otherwise return NULL.  */
3247
3248 static int
3249 internal_function
3250 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3251 {
3252   reg_errcode_t err;
3253   int i, j, ch, need_word_trtable = 0;
3254   bitset_word_t elem, mask;
3255   bool dests_node_malloced = false;
3256   bool dest_states_malloced = false;
3257   int ndests; /* Number of the destination states from `state'.  */
3258   re_dfastate_t **trtable;
3259   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3260   re_node_set follows, *dests_node;
3261   bitset_t *dests_ch;
3262   bitset_t acceptable;
3263
3264   struct dests_alloc
3265   {
3266     re_node_set dests_node[SBC_MAX];
3267     bitset_t dests_ch[SBC_MAX];
3268   } *dests_alloc;
3269
3270   /* We build DFA states which corresponds to the destination nodes
3271      from `state'.  `dests_node[i]' represents the nodes which i-th
3272      destination state contains, and `dests_ch[i]' represents the
3273      characters which i-th destination state accepts.  */
3274   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3275     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3276   else
3277     {
3278       dests_alloc = re_malloc (struct dests_alloc, 1);
3279       if (BE (dests_alloc == NULL, 0))
3280         return 0;
3281       dests_node_malloced = true;
3282     }
3283   dests_node = dests_alloc->dests_node;
3284   dests_ch = dests_alloc->dests_ch;
3285
3286   /* Initialize transiton table.  */
3287   state->word_trtable = state->trtable = NULL;
3288
3289   /* At first, group all nodes belonging to `state' into several
3290      destinations.  */
3291   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3292   if (BE (ndests <= 0, 0))
3293     {
3294       if (dests_node_malloced)
3295         free (dests_alloc);
3296       /* Return 0 in case of an error, 1 otherwise.  */
3297       if (ndests == 0)
3298         {
3299           state->trtable = (re_dfastate_t **)
3300             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3301           return 1;
3302         }
3303       return 0;
3304     }
3305
3306   err = re_node_set_alloc (&follows, ndests + 1);
3307   if (BE (err != REG_NOERROR, 0))
3308     goto out_free;
3309
3310   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3311                          + ndests * 3 * sizeof (re_dfastate_t *)))
3312     dest_states = (re_dfastate_t **)
3313       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3314   else
3315     {
3316       dest_states = (re_dfastate_t **)
3317         malloc (ndests * 3 * sizeof (re_dfastate_t *));
3318       if (BE (dest_states == NULL, 0))
3319         {
3320 out_free:
3321           if (dest_states_malloced)
3322             free (dest_states);
3323           re_node_set_free (&follows);
3324           for (i = 0; i < ndests; ++i)
3325             re_node_set_free (dests_node + i);
3326           if (dests_node_malloced)
3327             free (dests_alloc);
3328           return 0;
3329         }
3330       dest_states_malloced = true;
3331     }
3332   dest_states_word = dest_states + ndests;
3333   dest_states_nl = dest_states_word + ndests;
3334   bitset_empty (acceptable);
3335
3336   /* Then build the states for all destinations.  */
3337   for (i = 0; i < ndests; ++i)
3338     {
3339       int next_node;
3340       re_node_set_empty (&follows);
3341       /* Merge the follows of this destination states.  */
3342       for (j = 0; j < dests_node[i].nelem; ++j)
3343         {
3344           next_node = dfa->nexts[dests_node[i].elems[j]];
3345           if (next_node != -1)
3346             {
3347               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3348               if (BE (err != REG_NOERROR, 0))
3349                 goto out_free;
3350             }
3351         }
3352       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3353       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3354         goto out_free;
3355       /* If the new state has context constraint,
3356          build appropriate states for these contexts.  */
3357       if (dest_states[i]->has_constraint)
3358         {
3359           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3360                                                           CONTEXT_WORD);
3361           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3362             goto out_free;
3363
3364           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3365             need_word_trtable = 1;
3366
3367           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3368                                                         CONTEXT_NEWLINE);
3369           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3370             goto out_free;
3371         }
3372       else
3373         {
3374           dest_states_word[i] = dest_states[i];
3375           dest_states_nl[i] = dest_states[i];
3376         }
3377       bitset_merge (acceptable, dests_ch[i]);
3378     }
3379
3380   if (!BE (need_word_trtable, 0))
3381     {
3382       /* We don't care about whether the following character is a word
3383          character, or we are in a single-byte character set so we can
3384          discern by looking at the character code: allocate a
3385          256-entry transition table.  */
3386       trtable = state->trtable = calloc (sizeof (re_dfastate_t *), SBC_MAX);
3387       if (BE (trtable == NULL, 0))
3388         goto out_free;
3389
3390       /* For all characters ch...:  */
3391       for (i = 0; i < BITSET_WORDS; ++i)
3392         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3393              elem;
3394              mask <<= 1, elem >>= 1, ++ch)
3395           if (BE (elem & 1, 0))
3396             {
3397               /* There must be exactly one destination which accepts
3398                  character ch.  See group_nodes_into_DFAstates.  */
3399               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3400                 ;
3401
3402               /* j-th destination accepts the word character ch.  */
3403               if (dfa->word_char[i] & mask)
3404                 trtable[ch] = dest_states_word[j];
3405               else
3406                 trtable[ch] = dest_states[j];
3407             }
3408     }
3409   else
3410     {
3411       /* We care about whether the following character is a word
3412          character, and we are in a multi-byte character set: discern
3413          by looking at the character code: build two 256-entry
3414          transition tables, one starting at trtable[0] and one
3415          starting at trtable[SBC_MAX].  */
3416       trtable = state->word_trtable = calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3417       if (BE (trtable == NULL, 0))
3418         goto out_free;
3419
3420       /* For all characters ch...:  */
3421       for (i = 0; i < BITSET_WORDS; ++i)
3422         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3423              elem;
3424              mask <<= 1, elem >>= 1, ++ch)
3425           if (BE (elem & 1, 0))
3426             {
3427               /* There must be exactly one destination which accepts
3428                  character ch.  See group_nodes_into_DFAstates.  */
3429               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3430                 ;
3431
3432               /* j-th destination accepts the word character ch.  */
3433               trtable[ch] = dest_states[j];
3434               trtable[ch + SBC_MAX] = dest_states_word[j];
3435             }
3436     }
3437
3438   /* new line */
3439   if (bitset_contain (acceptable, NEWLINE_CHAR))
3440     {
3441       /* The current state accepts newline character.  */
3442       for (j = 0; j < ndests; ++j)
3443         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3444           {
3445             /* k-th destination accepts newline character.  */
3446             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3447             if (need_word_trtable)
3448               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3449             /* There must be only one destination which accepts
3450                newline.  See group_nodes_into_DFAstates.  */
3451             break;
3452           }
3453     }
3454
3455   if (dest_states_malloced)
3456     free (dest_states);
3457
3458   re_node_set_free (&follows);
3459   for (i = 0; i < ndests; ++i)
3460     re_node_set_free (dests_node + i);
3461
3462   if (dests_node_malloced)
3463     free (dests_alloc);
3464
3465   return 1;
3466 }
3467
3468 /* Group all nodes belonging to STATE into several destinations.
3469    Then for all destinations, set the nodes belonging to the destination
3470    to DESTS_NODE[i] and set the characters accepted by the destination
3471    to DEST_CH[i].  This function return the number of destinations.  */
3472
3473 static int
3474 internal_function
3475 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3476                             re_node_set *dests_node, bitset_t *dests_ch)
3477 {
3478   reg_errcode_t err;
3479   int result;
3480   int i, j, k;
3481   int ndests; /* Number of the destinations from `state'.  */
3482   bitset_t accepts; /* Characters a node can accept.  */
3483   const re_node_set *cur_nodes = &state->nodes;
3484   bitset_empty (accepts);
3485   ndests = 0;
3486
3487   /* For all the nodes belonging to `state',  */
3488   for (i = 0; i < cur_nodes->nelem; ++i)
3489     {
3490       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3491       re_token_type_t type = node->type;
3492       unsigned int constraint = node->constraint;
3493
3494       /* Enumerate all single byte character this node can accept.  */
3495       if (type == CHARACTER)
3496         bitset_set (accepts, node->opr.c);
3497       else if (type == SIMPLE_BRACKET)
3498         {
3499           bitset_merge (accepts, node->opr.sbcset);
3500         }
3501       else if (type == OP_PERIOD)
3502         {
3503 #ifdef RE_ENABLE_I18N
3504           if (dfa->mb_cur_max > 1)
3505             bitset_merge (accepts, dfa->sb_char);
3506           else
3507 #endif
3508             bitset_set_all (accepts);
3509           if (!(dfa->syntax & RE_DOT_NEWLINE))
3510             bitset_clear (accepts, '\n');
3511           if (dfa->syntax & RE_DOT_NOT_NULL)
3512             bitset_clear (accepts, '\0');
3513         }
3514 #ifdef RE_ENABLE_I18N
3515       else if (type == OP_UTF8_PERIOD)
3516         {
3517           memset (accepts, '\xff', sizeof (bitset_t) / 2);
3518           if (!(dfa->syntax & RE_DOT_NEWLINE))
3519             bitset_clear (accepts, '\n');
3520           if (dfa->syntax & RE_DOT_NOT_NULL)
3521             bitset_clear (accepts, '\0');
3522         }
3523 #endif
3524       else
3525         continue;
3526
3527       /* Check the `accepts' and sift the characters which are not
3528          match it the context.  */
3529       if (constraint)
3530         {
3531           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3532             {
3533               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3534               bitset_empty (accepts);
3535               if (accepts_newline)
3536                 bitset_set (accepts, NEWLINE_CHAR);
3537               else
3538                 continue;
3539             }
3540           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3541             {
3542               bitset_empty (accepts);
3543               continue;
3544             }
3545
3546           if (constraint & NEXT_WORD_CONSTRAINT)
3547             {
3548               bitset_word_t any_set = 0;
3549               if (type == CHARACTER && !node->word_char)
3550                 {
3551                   bitset_empty (accepts);
3552                   continue;
3553                 }
3554 #ifdef RE_ENABLE_I18N
3555               if (dfa->mb_cur_max > 1)
3556                 for (j = 0; j < BITSET_WORDS; ++j)
3557                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3558               else
3559 #endif
3560                 for (j = 0; j < BITSET_WORDS; ++j)
3561                   any_set |= (accepts[j] &= dfa->word_char[j]);
3562               if (!any_set)
3563                 continue;
3564             }
3565           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3566             {
3567               bitset_word_t any_set = 0;
3568               if (type == CHARACTER && node->word_char)
3569                 {
3570                   bitset_empty (accepts);
3571                   continue;
3572                 }
3573 #ifdef RE_ENABLE_I18N
3574               if (dfa->mb_cur_max > 1)
3575                 for (j = 0; j < BITSET_WORDS; ++j)
3576                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3577               else
3578 #endif
3579                 for (j = 0; j < BITSET_WORDS; ++j)
3580                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3581               if (!any_set)
3582                 continue;
3583             }
3584         }
3585
3586       /* Then divide `accepts' into DFA states, or create a new
3587          state.  Above, we make sure that accepts is not empty.  */
3588       for (j = 0; j < ndests; ++j)
3589         {
3590           bitset_t intersec; /* Intersection sets, see below.  */
3591           bitset_t remains;
3592           /* Flags, see below.  */
3593           bitset_word_t has_intersec, not_subset, not_consumed;
3594
3595           /* Optimization, skip if this state doesn't accept the character.  */
3596           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3597             continue;
3598
3599           /* Enumerate the intersection set of this state and `accepts'.  */
3600           has_intersec = 0;
3601           for (k = 0; k < BITSET_WORDS; ++k)
3602             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3603           /* And skip if the intersection set is empty.  */
3604           if (!has_intersec)
3605             continue;
3606
3607           /* Then check if this state is a subset of `accepts'.  */
3608           not_subset = not_consumed = 0;
3609           for (k = 0; k < BITSET_WORDS; ++k)
3610             {
3611               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3612               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3613             }
3614
3615           /* If this state isn't a subset of `accepts', create a
3616              new group state, which has the `remains'. */
3617           if (not_subset)
3618             {
3619               bitset_copy (dests_ch[ndests], remains);
3620               bitset_copy (dests_ch[j], intersec);
3621               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3622               if (BE (err != REG_NOERROR, 0))
3623                 goto error_return;
3624               ++ndests;
3625             }
3626
3627           /* Put the position in the current group. */
3628           result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3629           if (BE (result < 0, 0))
3630             goto error_return;
3631
3632           /* If all characters are consumed, go to next node. */
3633           if (!not_consumed)
3634             break;
3635         }
3636       /* Some characters remain, create a new group. */
3637       if (j == ndests)
3638         {
3639           bitset_copy (dests_ch[ndests], accepts);
3640           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3641           if (BE (err != REG_NOERROR, 0))
3642             goto error_return;
3643           ++ndests;
3644           bitset_empty (accepts);
3645         }
3646     }
3647   return ndests;
3648  error_return:
3649   for (j = 0; j < ndests; ++j)
3650     re_node_set_free (dests_node + j);
3651   return -1;
3652 }
3653
3654 #ifdef RE_ENABLE_I18N
3655 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3656    Return the number of the bytes the node accepts.
3657    STR_IDX is the current index of the input string.
3658
3659    This function handles the nodes which can accept one character, or
3660    one collating element like '.', '[a-z]', opposite to the other nodes
3661    can only accept one byte.  */
3662
3663 static int
3664 internal_function
3665 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3666                          const re_string_t *input, int str_idx)
3667 {
3668   const re_token_t *node = dfa->nodes + node_idx;
3669   int char_len, elem_len;
3670   int i;
3671
3672   if (BE (node->type == OP_UTF8_PERIOD, 0))
3673     {
3674       unsigned char c = re_string_byte_at (input, str_idx), d;
3675       if (BE (c < 0xc2, 1))
3676         return 0;
3677
3678       if (str_idx + 2 > input->len)
3679         return 0;
3680
3681       d = re_string_byte_at (input, str_idx + 1);
3682       if (c < 0xe0)
3683         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3684       else if (c < 0xf0)
3685         {
3686           char_len = 3;
3687           if (c == 0xe0 && d < 0xa0)
3688             return 0;
3689         }
3690       else if (c < 0xf8)
3691         {
3692           char_len = 4;
3693           if (c == 0xf0 && d < 0x90)
3694             return 0;
3695         }
3696       else if (c < 0xfc)
3697         {
3698           char_len = 5;
3699           if (c == 0xf8 && d < 0x88)
3700             return 0;
3701         }
3702       else if (c < 0xfe)
3703         {
3704           char_len = 6;
3705           if (c == 0xfc && d < 0x84)
3706             return 0;
3707         }
3708       else
3709         return 0;
3710
3711       if (str_idx + char_len > input->len)
3712         return 0;
3713
3714       for (i = 1; i < char_len; ++i)
3715         {
3716           d = re_string_byte_at (input, str_idx + i);
3717           if (d < 0x80 || d > 0xbf)
3718             return 0;
3719         }
3720       return char_len;
3721     }
3722
3723   char_len = re_string_char_size_at (input, str_idx);
3724   if (node->type == OP_PERIOD)
3725     {
3726       if (char_len <= 1)
3727         return 0;
3728       /* FIXME: I don't think this if is needed, as both '\n'
3729          and '\0' are char_len == 1.  */
3730       /* '.' accepts any one character except the following two cases.  */
3731       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3732            re_string_byte_at (input, str_idx) == '\n') ||
3733           ((dfa->syntax & RE_DOT_NOT_NULL) &&
3734            re_string_byte_at (input, str_idx) == '\0'))
3735         return 0;
3736       return char_len;
3737     }
3738
3739   elem_len = re_string_elem_size_at (input, str_idx);
3740   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3741     return 0;
3742
3743   if (node->type == COMPLEX_BRACKET)
3744     {
3745       const re_charset_t *cset = node->opr.mbcset;
3746 # if 0
3747       const unsigned char *pin
3748         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3749       int j;
3750       uint32_t nrules;
3751 # endif
3752       int match_len = 0;
3753       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3754                     ? re_string_wchar_at (input, str_idx) : 0);
3755
3756       /* match with multibyte character?  */
3757       for (i = 0; i < cset->nmbchars; ++i)
3758         if (wc == cset->mbchars[i])
3759           {
3760             match_len = char_len;
3761             goto check_node_accept_bytes_match;
3762           }
3763       /* match with character_class?  */
3764       for (i = 0; i < cset->nchar_classes; ++i)
3765         {
3766           wctype_t wt = cset->char_classes[i];
3767           if (__iswctype (wc, wt))
3768             {
3769               match_len = char_len;
3770               goto check_node_accept_bytes_match;
3771             }
3772         }
3773
3774 # if 0
3775       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3776       if (nrules != 0)
3777         {
3778           unsigned int in_collseq = 0;
3779           const int32_t *table, *indirect;
3780           const unsigned char *weights, *extra;
3781           const char *collseqwc;
3782           int32_t idx;
3783           /* This #include defines a local function!  */
3784 #  include <locale/weight.h>
3785
3786           /* match with collating_symbol?  */
3787           if (cset->ncoll_syms)
3788             extra = (const unsigned char *)
3789               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3790           for (i = 0; i < cset->ncoll_syms; ++i)
3791             {
3792               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3793               /* Compare the length of input collating element and
3794                  the length of current collating element.  */
3795               if (*coll_sym != elem_len)
3796                 continue;
3797               /* Compare each bytes.  */
3798               for (j = 0; j < *coll_sym; j++)
3799                 if (pin[j] != coll_sym[1 + j])
3800                   break;
3801               if (j == *coll_sym)
3802                 {
3803                   /* Match if every bytes is equal.  */
3804                   match_len = j;
3805                   goto check_node_accept_bytes_match;
3806                 }
3807             }
3808
3809           if (cset->nranges)
3810             {
3811               if (elem_len <= char_len)
3812                 {
3813                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3814                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3815                 }
3816               else
3817                 in_collseq = find_collation_sequence_value (pin, elem_len);
3818             }
3819           /* match with range expression?  */
3820           for (i = 0; i < cset->nranges; ++i)
3821             if (cset->range_starts[i] <= in_collseq
3822                 && in_collseq <= cset->range_ends[i])
3823               {
3824                 match_len = elem_len;
3825                 goto check_node_accept_bytes_match;
3826               }
3827
3828           /* match with equivalence_class?  */
3829           if (cset->nequiv_classes)
3830             {
3831               const unsigned char *cp = pin;
3832               table = (const int32_t *)
3833                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3834               weights = (const unsigned char *)
3835                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3836               extra = (const unsigned char *)
3837                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3838               indirect = (const int32_t *)
3839                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3840               idx = findidx (&cp);
3841               if (idx > 0)
3842                 for (i = 0; i < cset->nequiv_classes; ++i)
3843                   {
3844                     int32_t equiv_class_idx = cset->equiv_classes[i];
3845                     size_t weight_len = weights[idx];
3846                     if (weight_len == weights[equiv_class_idx])
3847                       {
3848                         int cnt = 0;
3849                         while (cnt <= weight_len
3850                                && (weights[equiv_class_idx + 1 + cnt]
3851                                    == weights[idx + 1 + cnt]))
3852                           ++cnt;
3853                         if (cnt > weight_len)
3854                           {
3855                             match_len = elem_len;
3856                             goto check_node_accept_bytes_match;
3857                           }
3858                       }
3859                   }
3860             }
3861         }
3862       else
3863 # endif
3864         {
3865           /* match with range expression?  */
3866           wchar_t cmp_buf[6];
3867
3868           memset (cmp_buf, 0, sizeof(cmp_buf));
3869           cmp_buf[2] = wc;
3870           for (i = 0; i < cset->nranges; ++i)
3871             {
3872               cmp_buf[0] = cset->range_starts[i];
3873               cmp_buf[4] = cset->range_ends[i];
3874               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3875                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3876                 {
3877                   match_len = char_len;
3878                   goto check_node_accept_bytes_match;
3879                 }
3880             }
3881         }
3882
3883  check_node_accept_bytes_match:
3884       if (!cset->non_match)
3885         return match_len;
3886       if (match_len > 0)
3887         return 0;
3888       return (elem_len > char_len) ? elem_len : char_len;
3889     }
3890   return 0;
3891 }
3892
3893 # if 0
3894 static unsigned int
3895 internal_function
3896 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3897 {
3898   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3899   if (nrules == 0)
3900     {
3901       if (mbs_len == 1)
3902         {
3903           /* No valid character.  Match it as a single byte character.  */
3904           const unsigned char *collseq = (const unsigned char *)
3905             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3906           return collseq[mbs[0]];
3907         }
3908       return UINT_MAX;
3909     }
3910   else
3911     {
3912       int32_t idx;
3913       const unsigned char *extra = (const unsigned char *)
3914         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3915       int32_t extrasize = (const unsigned char *)
3916         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3917
3918       for (idx = 0; idx < extrasize;)
3919         {
3920           int mbs_cnt, found = 0;
3921           int32_t elem_mbs_len;
3922           /* Skip the name of collating element name.  */
3923           idx = idx + extra[idx] + 1;
3924           elem_mbs_len = extra[idx++];
3925           if (mbs_len == elem_mbs_len)
3926             {
3927               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3928                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3929                   break;
3930               if (mbs_cnt == elem_mbs_len)
3931                 /* Found the entry.  */
3932                 found = 1;
3933             }
3934           /* Skip the byte sequence of the collating element.  */
3935           idx += elem_mbs_len;
3936           /* Adjust for the alignment.  */
3937           idx = (idx + 3) & ~3;
3938           /* Skip the collation sequence value.  */
3939           idx += sizeof (uint32_t);
3940           /* Skip the wide char sequence of the collating element.  */
3941           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3942           /* If we found the entry, return the sequence value.  */
3943           if (found)
3944             return *(uint32_t *) (extra + idx);
3945           /* Skip the collation sequence value.  */
3946           idx += sizeof (uint32_t);
3947         }
3948       return UINT_MAX;
3949     }
3950 }
3951 # endif
3952 #endif /* RE_ENABLE_I18N */
3953
3954 /* Check whether the node accepts the byte which is IDX-th
3955    byte of the INPUT.  */
3956
3957 static int
3958 internal_function
3959 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
3960                    int idx)
3961 {
3962   unsigned char ch;
3963   ch = re_string_byte_at (&mctx->input, idx);
3964   switch (node->type)
3965     {
3966     case CHARACTER:
3967       if (node->opr.c != ch)
3968         return 0;
3969       break;
3970
3971     case SIMPLE_BRACKET:
3972       if (!bitset_contain (node->opr.sbcset, ch))
3973         return 0;
3974       break;
3975
3976 #ifdef RE_ENABLE_I18N
3977     case OP_UTF8_PERIOD:
3978       if (ch >= 0x80)
3979         return 0;
3980       /* FALLTHROUGH */
3981 #endif
3982     case OP_PERIOD:
3983       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
3984           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
3985         return 0;
3986       break;
3987
3988     default:
3989       return 0;
3990     }
3991
3992   if (node->constraint)
3993     {
3994       /* The node has constraints.  Check whether the current context
3995          satisfies the constraints.  */
3996       unsigned int context = re_string_context_at (&mctx->input, idx,
3997                                                    mctx->eflags);
3998       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3999         return 0;
4000     }
4001
4002   return 1;
4003 }
4004
4005 /* Extend the buffers, if the buffers have run out.  */
4006
4007 static reg_errcode_t
4008 internal_function
4009 extend_buffers (re_match_context_t *mctx)
4010 {
4011   reg_errcode_t ret;
4012   re_string_t *pstr = &mctx->input;
4013
4014   /* Double the lengthes of the buffers.  */
4015   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4016   if (BE (ret != REG_NOERROR, 0))
4017     return ret;
4018
4019   if (mctx->state_log != NULL)
4020     {
4021       /* And double the length of state_log.  */
4022       /* XXX We have no indication of the size of this buffer.  If this
4023          allocation fail we have no indication that the state_log array
4024          does not have the right size.  */
4025       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4026                                               pstr->bufs_len + 1);
4027       if (BE (new_array == NULL, 0))
4028         return REG_ESPACE;
4029       mctx->state_log = new_array;
4030     }
4031
4032   /* Then reconstruct the buffers.  */
4033   if (pstr->icase)
4034     {
4035 #ifdef RE_ENABLE_I18N
4036       if (pstr->mb_cur_max > 1)
4037         {
4038           ret = build_wcs_upper_buffer (pstr);
4039           if (BE (ret != REG_NOERROR, 0))
4040             return ret;
4041         }
4042       else
4043 #endif /* RE_ENABLE_I18N  */
4044         build_upper_buffer (pstr);
4045     }
4046   else
4047     {
4048 #ifdef RE_ENABLE_I18N
4049       if (pstr->mb_cur_max > 1)
4050         build_wcs_buffer (pstr);
4051       else
4052 #endif /* RE_ENABLE_I18N  */
4053         {
4054           if (pstr->trans != NULL)
4055             re_string_translate_buffer (pstr);
4056         }
4057     }
4058   return REG_NOERROR;
4059 }
4060
4061
4062 /* Functions for matching context.  */
4063
4064 /* Initialize MCTX.  */
4065
4066 static reg_errcode_t
4067 internal_function
4068 match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4069 {
4070   mctx->eflags = eflags;
4071   mctx->match_last = -1;
4072   if (n > 0)
4073     {
4074       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4075       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4076       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4077         return REG_ESPACE;
4078     }
4079   /* Already zero-ed by the caller.
4080      else
4081        mctx->bkref_ents = NULL;
4082      mctx->nbkref_ents = 0;
4083      mctx->nsub_tops = 0;  */
4084   mctx->abkref_ents = n;
4085   mctx->max_mb_elem_len = 1;
4086   mctx->asub_tops = n;
4087   return REG_NOERROR;
4088 }
4089
4090 /* Clean the entries which depend on the current input in MCTX.
4091    This function must be invoked when the matcher changes the start index
4092    of the input, or changes the input string.  */
4093
4094 static void
4095 internal_function
4096 match_ctx_clean (re_match_context_t *mctx)
4097 {
4098   int st_idx;
4099   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4100     {
4101       int sl_idx;
4102       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4103       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4104         {
4105           re_sub_match_last_t *last = top->lasts[sl_idx];
4106           re_free (last->path.array);
4107           re_free (last);
4108         }
4109       re_free (top->lasts);
4110       if (top->path)
4111         {
4112           re_free (top->path->array);
4113           re_free (top->path);
4114         }
4115       free (top);
4116     }
4117
4118   mctx->nsub_tops = 0;
4119   mctx->nbkref_ents = 0;
4120 }
4121
4122 /* Free all the memory associated with MCTX.  */
4123
4124 static void
4125 internal_function
4126 match_ctx_free (re_match_context_t *mctx)
4127 {
4128   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4129   match_ctx_clean (mctx);
4130   re_free (mctx->sub_tops);
4131   re_free (mctx->bkref_ents);
4132 }
4133
4134 /* Add a new backreference entry to MCTX.
4135    Note that we assume that caller never call this function with duplicate
4136    entry, and call with STR_IDX which isn't smaller than any existing entry.
4137 */
4138
4139 static reg_errcode_t
4140 internal_function
4141 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4142                      int to)
4143 {
4144   if (mctx->nbkref_ents >= mctx->abkref_ents)
4145     {
4146       struct re_backref_cache_entry* new_entry;
4147       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4148                               mctx->abkref_ents * 2);
4149       if (BE (new_entry == NULL, 0))
4150         {
4151           re_free (mctx->bkref_ents);
4152           return REG_ESPACE;
4153         }
4154       mctx->bkref_ents = new_entry;
4155       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4156               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4157       mctx->abkref_ents *= 2;
4158     }
4159   if (mctx->nbkref_ents > 0
4160       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4161     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4162
4163   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4164   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4165   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4166   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4167
4168   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4169      If bit N is clear, means that this entry won't epsilon-transition to
4170      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4171      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4172      such node.
4173
4174      A backreference does not epsilon-transition unless it is empty, so set
4175      to all zeros if FROM != TO.  */
4176   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4177     = (from == to ? ~0 : 0);
4178
4179   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4180   if (mctx->max_mb_elem_len < to - from)
4181     mctx->max_mb_elem_len = to - from;
4182   return REG_NOERROR;
4183 }
4184
4185 /* Search for the first entry which has the same str_idx, or -1 if none is
4186    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4187
4188 static int
4189 internal_function
4190 search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4191 {
4192   int left, right, mid, last;
4193   last = right = mctx->nbkref_ents;
4194   for (left = 0; left < right;)
4195     {
4196       mid = (left + right) / 2;
4197       if (mctx->bkref_ents[mid].str_idx < str_idx)
4198         left = mid + 1;
4199       else
4200         right = mid;
4201     }
4202   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4203     return left;
4204   else
4205     return -1;
4206 }
4207
4208 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4209    at STR_IDX.  */
4210
4211 static reg_errcode_t
4212 internal_function
4213 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4214 {
4215 #ifdef DEBUG
4216   assert (mctx->sub_tops != NULL);
4217   assert (mctx->asub_tops > 0);
4218 #endif
4219   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4220     {
4221       int new_asub_tops = mctx->asub_tops * 2;
4222       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4223                                                    re_sub_match_top_t *,
4224                                                    new_asub_tops);
4225       if (BE (new_array == NULL, 0))
4226         return REG_ESPACE;
4227       mctx->sub_tops = new_array;
4228       mctx->asub_tops = new_asub_tops;
4229     }
4230   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4231   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4232     return REG_ESPACE;
4233   mctx->sub_tops[mctx->nsub_tops]->node = node;
4234   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4235   return REG_NOERROR;
4236 }
4237
4238 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4239    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4240
4241 static re_sub_match_last_t *
4242 internal_function
4243 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4244 {
4245   re_sub_match_last_t *new_entry;
4246   if (BE (subtop->nlasts == subtop->alasts, 0))
4247     {
4248       int new_alasts = 2 * subtop->alasts + 1;
4249       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4250                                                     re_sub_match_last_t *,
4251                                                     new_alasts);
4252       if (BE (new_array == NULL, 0))
4253         return NULL;
4254       subtop->lasts = new_array;
4255       subtop->alasts = new_alasts;
4256     }
4257   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4258   if (BE (new_entry != NULL, 1))
4259     {
4260       subtop->lasts[subtop->nlasts] = new_entry;
4261       new_entry->node = node;
4262       new_entry->str_idx = str_idx;
4263       ++subtop->nlasts;
4264     }
4265   return new_entry;
4266 }
4267
4268 static void
4269 internal_function
4270 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4271                re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4272 {
4273   sctx->sifted_states = sifted_sts;
4274   sctx->limited_states = limited_sts;
4275   sctx->last_node = last_node;
4276   sctx->last_str_idx = last_str_idx;
4277   re_node_set_init_empty (&sctx->limits);
4278 }