]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/libstdc++-v3/contrib/libstdc++-v3-4.9/include/bits/regex.tcc
update
[l4.git] / l4 / pkg / libstdc++-v3 / contrib / libstdc++-v3-4.9 / include / bits / regex.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30
31 // See below __regex_algo_impl to get what this is talking about. The default
32 // value 1 indicated a conservative optimization without giving up worst case
33 // performance.
34 #ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
35 #define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
36 #endif
37
38 namespace std _GLIBCXX_VISIBILITY(default)
39 {
40 namespace __detail
41 {
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43
44   // Result of merging regex_match and regex_search.
45   //
46   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47   // the other one if possible, for test purpose).
48   //
49   // That __match_mode is true means regex_match, else regex_search.
50   template<typename _BiIter, typename _Alloc,
51            typename _CharT, typename _TraitsT,
52            _RegexExecutorPolicy __policy,
53            bool __match_mode>
54     bool
55     __regex_algo_impl(_BiIter                              __s,
56                       _BiIter                              __e,
57                       match_results<_BiIter, _Alloc>&      __m,
58                       const basic_regex<_CharT, _TraitsT>& __re,
59                       regex_constants::match_flag_type     __flags)
60     {
61       if (__re._M_automaton == nullptr)
62         return false;
63
64       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65       __res.resize(__re._M_automaton->_M_sub_count() + 2);
66       for (auto& __it : __res)
67         __it.matched = false;
68
69       // This function decide which executor to use under given circumstances.
70       // The _S_auto policy now is the following: if a NFA has no
71       // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
72       // quantifiers (*, +, ?), the BFS executor will be used, other wise
73       // DFS executor. This is because DFS executor has a exponential upper
74       // bound, but better best-case performace. Meanwhile, BFS executor can
75       // effectively prevent from exponential-long time matching (which must
76       // contains many quantifiers), but it's slower in average.
77       //
78       // For simple regex, BFS executor could be 2 or more times slower than
79       // DFS executor.
80       //
81       // Of course, BFS executor cannot handle back-references.
82       bool __ret;
83       if (!__re._M_automaton->_M_has_backref
84           && (__policy == _RegexExecutorPolicy::_S_alternate
85               || __re._M_automaton->_M_quant_count
86                 > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
87         {
88           _Executor<_BiIter, _Alloc, _TraitsT, false>
89             __executor(__s, __e, __m, __re, __flags);
90           if (__match_mode)
91             __ret = __executor._M_match();
92           else
93             __ret = __executor._M_search();
94         }
95       else
96         {
97           _Executor<_BiIter, _Alloc, _TraitsT, true>
98             __executor(__s, __e, __m, __re, __flags);
99           if (__match_mode)
100             __ret = __executor._M_match();
101           else
102             __ret = __executor._M_search();
103         }
104       if (__ret)
105         {
106           for (auto __it : __res)
107             if (!__it.matched)
108               __it.first = __it.second = __e;
109           auto& __pre = __res[__res.size()-2];
110           auto& __suf = __res[__res.size()-1];
111           if (__match_mode)
112             {
113               __pre.matched = false;
114               __pre.first = __s;
115               __pre.second = __s;
116               __suf.matched = false;
117               __suf.first = __e;
118               __suf.second = __e;
119             }
120           else
121             {
122               __pre.first = __s;
123               __pre.second = __res[0].first;
124               __pre.matched = (__pre.first != __pre.second);
125               __suf.first = __res[0].second;
126               __suf.second = __e;
127               __suf.matched = (__suf.first != __suf.second);
128             }
129         }
130       return __ret;
131     }
132
133 _GLIBCXX_END_NAMESPACE_VERSION
134 }
135
136 _GLIBCXX_BEGIN_NAMESPACE_VERSION
137
138   template<typename _Ch_type>
139   template<typename _Fwd_iter>
140     typename regex_traits<_Ch_type>::string_type
141     regex_traits<_Ch_type>::
142     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
143     {
144       typedef std::ctype<char_type> __ctype_type;
145       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
146
147       static const char* __collatenames[] =
148         {
149           "NUL",
150           "SOH",
151           "STX",
152           "ETX",
153           "EOT",
154           "ENQ",
155           "ACK",
156           "alert",
157           "backspace",
158           "tab",
159           "newline",
160           "vertical-tab",
161           "form-feed",
162           "carriage-return",
163           "SO",
164           "SI",
165           "DLE",
166           "DC1",
167           "DC2",
168           "DC3",
169           "DC4",
170           "NAK",
171           "SYN",
172           "ETB",
173           "CAN",
174           "EM",
175           "SUB",
176           "ESC",
177           "IS4",
178           "IS3",
179           "IS2",
180           "IS1",
181           "space",
182           "exclamation-mark",
183           "quotation-mark",
184           "number-sign",
185           "dollar-sign",
186           "percent-sign",
187           "ampersand",
188           "apostrophe",
189           "left-parenthesis",
190           "right-parenthesis",
191           "asterisk",
192           "plus-sign",
193           "comma",
194           "hyphen",
195           "period",
196           "slash",
197           "zero",
198           "one",
199           "two",
200           "three",
201           "four",
202           "five",
203           "six",
204           "seven",
205           "eight",
206           "nine",
207           "colon",
208           "semicolon",
209           "less-than-sign",
210           "equals-sign",
211           "greater-than-sign",
212           "question-mark",
213           "commercial-at",
214           "A",
215           "B",
216           "C",
217           "D",
218           "E",
219           "F",
220           "G",
221           "H",
222           "I",
223           "J",
224           "K",
225           "L",
226           "M",
227           "N",
228           "O",
229           "P",
230           "Q",
231           "R",
232           "S",
233           "T",
234           "U",
235           "V",
236           "W",
237           "X",
238           "Y",
239           "Z",
240           "left-square-bracket",
241           "backslash",
242           "right-square-bracket",
243           "circumflex",
244           "underscore",
245           "grave-accent",
246           "a",
247           "b",
248           "c",
249           "d",
250           "e",
251           "f",
252           "g",
253           "h",
254           "i",
255           "j",
256           "k",
257           "l",
258           "m",
259           "n",
260           "o",
261           "p",
262           "q",
263           "r",
264           "s",
265           "t",
266           "u",
267           "v",
268           "w",
269           "x",
270           "y",
271           "z",
272           "left-curly-bracket",
273           "vertical-line",
274           "right-curly-bracket",
275           "tilde",
276           "DEL",
277           ""
278         };
279
280       // same as boost
281       //static const char* __digraphs[] =
282       //  {
283       //    "ae",
284       //    "Ae",
285       //    "AE",
286       //    "ch",
287       //    "Ch",
288       //    "CH",
289       //    "ll",
290       //    "Ll",
291       //    "LL",
292       //    "ss",
293       //    "Ss",
294       //    "SS",
295       //    "nj",
296       //    "Nj",
297       //    "NJ",
298       //    "dz",
299       //    "Dz",
300       //    "DZ",
301       //    "lj",
302       //    "Lj",
303       //    "LJ",
304       //    ""
305       //  };
306
307       std::string __s(__last - __first, '?');
308       __fctyp.narrow(__first, __last, '?', &*__s.begin());
309
310       for (unsigned int __i = 0; *__collatenames[__i]; __i++)
311         if (__s == __collatenames[__i])
312           return string_type(1, __fctyp.widen(static_cast<char>(__i)));
313
314       //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
315       //  {
316       //    const char* __now = __digraphs[__i];
317       //    if (__s == __now)
318       //      {
319       //        string_type ret(__s.size(), __fctyp.widen('?'));
320       //        __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
321       //        return ret;
322       //      }
323       //  }
324       return string_type();
325     }
326
327   template<typename _Ch_type>
328   template<typename _Fwd_iter>
329     typename regex_traits<_Ch_type>::char_class_type
330     regex_traits<_Ch_type>::
331     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
332     {
333       typedef std::ctype<char_type> __ctype_type;
334       typedef std::ctype<char> __cctype_type;
335       typedef const pair<const char*, char_class_type> _ClassnameEntry;
336       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
337       const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
338
339       static _ClassnameEntry __classnames[] =
340       {
341         {"d", ctype_base::digit},
342         {"w", {ctype_base::alnum, _RegexMask::_S_under}},
343         {"s", ctype_base::space},
344         {"alnum", ctype_base::alnum},
345         {"alpha", ctype_base::alpha},
346         {"blank", {0, _RegexMask::_S_blank}},
347         {"cntrl", ctype_base::cntrl},
348         {"digit", ctype_base::digit},
349         {"graph", ctype_base::graph},
350         {"lower", ctype_base::lower},
351         {"print", ctype_base::print},
352         {"punct", ctype_base::punct},
353         {"space", ctype_base::space},
354         {"upper", ctype_base::upper},
355         {"xdigit", ctype_base::xdigit},
356       };
357
358       std::string __s(__last - __first, '?');
359       __fctyp.narrow(__first, __last, '?', &__s[0]);
360       __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
361       for (_ClassnameEntry* __it = __classnames;
362            __it < *(&__classnames + 1);
363            ++__it)
364         {
365           if (__s == __it->first)
366             {
367               if (__icase
368                   && ((__it->second
369                        & (ctype_base::lower | ctype_base::upper)) != 0))
370                 return ctype_base::alpha;
371               return __it->second;
372             }
373         }
374       return 0;
375     }
376
377   template<typename _Ch_type>
378     bool
379     regex_traits<_Ch_type>::
380     isctype(_Ch_type __c, char_class_type __f) const
381     {
382       typedef std::ctype<char_type> __ctype_type;
383       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
384
385       return __fctyp.is(__f._M_base, __c)
386         // [[:w:]]
387         || ((__f._M_extended & _RegexMask::_S_under)
388             && __c == __fctyp.widen('_'))
389         // [[:blank:]]
390         || ((__f._M_extended & _RegexMask::_S_blank)
391             && (__c == __fctyp.widen(' ')
392                 || __c == __fctyp.widen('\t')));
393     }
394
395   template<typename _Ch_type>
396     int
397     regex_traits<_Ch_type>::
398     value(_Ch_type __ch, int __radix) const
399     {
400       std::basic_istringstream<char_type> __is(string_type(1, __ch));
401       long __v;
402       if (__radix == 8)
403         __is >> std::oct;
404       else if (__radix == 16)
405         __is >> std::hex;
406       __is >> __v;
407       return __is.fail() ? -1 : __v;
408     }
409
410   template<typename _Bi_iter, typename _Alloc>
411   template<typename _Out_iter>
412     _Out_iter match_results<_Bi_iter, _Alloc>::
413     format(_Out_iter __out,
414            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
415            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
416            match_flag_type __flags) const
417     {
418       _GLIBCXX_DEBUG_ASSERT( ready() );
419       regex_traits<char_type> __traits;
420       typedef std::ctype<char_type> __ctype_type;
421       const __ctype_type&
422         __fctyp(use_facet<__ctype_type>(__traits.getloc()));
423
424       auto __output = [&](size_t __idx)
425         {
426           auto& __sub = _Base_type::operator[](__idx);
427           if (__sub.matched)
428             __out = std::copy(__sub.first, __sub.second, __out);
429         };
430
431       if (__flags & regex_constants::format_sed)
432         {
433           for (; __fmt_first != __fmt_last;)
434             if (*__fmt_first == '&')
435               {
436                 __output(0);
437                 ++__fmt_first;
438               }
439             else if (*__fmt_first == '\\')
440               {
441                 if (++__fmt_first != __fmt_last
442                     && __fctyp.is(__ctype_type::digit, *__fmt_first))
443                   __output(__traits.value(*__fmt_first++, 10));
444                 else
445                   *__out++ = '\\';
446               }
447             else
448               *__out++ = *__fmt_first++;
449         }
450       else
451         {
452           while (1)
453             {
454               auto __next = std::find(__fmt_first, __fmt_last, '$');
455               if (__next == __fmt_last)
456                 break;
457
458               __out = std::copy(__fmt_first, __next, __out);
459
460               auto __eat = [&](char __ch) -> bool
461                 {
462                   if (*__next == __ch)
463                     {
464                       ++__next;
465                       return true;
466                     }
467                   return false;
468                 };
469
470               if (++__next == __fmt_last)
471                 *__out++ = '$';
472               else if (__eat('$'))
473                 *__out++ = '$';
474               else if (__eat('&'))
475                 __output(0);
476               else if (__eat('`'))
477                 __output(_Base_type::size()-2);
478               else if (__eat('\''))
479                 __output(_Base_type::size()-1);
480               else if (__fctyp.is(__ctype_type::digit, *__next))
481                 {
482                   long __num = __traits.value(*__next, 10);
483                   if (++__next != __fmt_last
484                       && __fctyp.is(__ctype_type::digit, *__next))
485                     {
486                       __num *= 10;
487                       __num += __traits.value(*__next++, 10);
488                     }
489                   if (0 <= __num && __num < this->size())
490                     __output(__num);
491                 }
492               else
493                 *__out++ = '$';
494               __fmt_first = __next;
495             }
496           __out = std::copy(__fmt_first, __fmt_last, __out);
497         }
498       return __out;
499     }
500
501   template<typename _Out_iter, typename _Bi_iter,
502            typename _Rx_traits, typename _Ch_type>
503     _Out_iter
504     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
505                   const basic_regex<_Ch_type, _Rx_traits>& __e,
506                   const _Ch_type* __fmt,
507                   regex_constants::match_flag_type __flags)
508     {
509       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
510       _IterT __i(__first, __last, __e, __flags);
511       _IterT __end;
512       if (__i == __end)
513         {
514           if (!(__flags & regex_constants::format_no_copy))
515             __out = std::copy(__first, __last, __out);
516         }
517       else
518         {
519           sub_match<_Bi_iter> __last;
520           auto __len = char_traits<_Ch_type>::length(__fmt);
521           for (; __i != __end; ++__i)
522             {
523               if (!(__flags & regex_constants::format_no_copy))
524                 __out = std::copy(__i->prefix().first, __i->prefix().second,
525                                   __out);
526               __out = __i->format(__out, __fmt, __fmt + __len, __flags);
527               __last = __i->suffix();
528               if (__flags & regex_constants::format_first_only)
529                 break;
530             }
531           if (!(__flags & regex_constants::format_no_copy))
532             __out = std::copy(__last.first, __last.second, __out);
533         }
534       return __out;
535     }
536
537   template<typename _Bi_iter,
538            typename _Ch_type,
539            typename _Rx_traits>
540     bool
541     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
542     operator==(const regex_iterator& __rhs) const
543     {
544       return (_M_match.empty() && __rhs._M_match.empty())
545         || (_M_begin == __rhs._M_begin
546             && _M_end == __rhs._M_end
547             && _M_pregex == __rhs._M_pregex
548             && _M_flags == __rhs._M_flags
549             && _M_match[0] == __rhs._M_match[0]);
550     }
551
552   template<typename _Bi_iter,
553            typename _Ch_type,
554            typename _Rx_traits>
555     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
556     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
557     operator++()
558     {
559       // In all cases in which the call to regex_search returns true,
560       // match.prefix().first shall be equal to the previous value of
561       // match[0].second, and for each index i in the half-open range
562       // [0, match.size()) for which match[i].matched is true,
563       // match[i].position() shall return distance(begin, match[i].first).
564       // [28.12.1.4.5]
565       if (_M_match[0].matched)
566         {
567           auto __start = _M_match[0].second;
568           auto __prefix_first = _M_match[0].second;
569           if (_M_match[0].first == _M_match[0].second)
570             {
571               if (__start == _M_end)
572                 {
573                   _M_match = value_type();
574                   return *this;
575                 }
576               else
577                 {
578                   if (regex_search(__start, _M_end, _M_match, *_M_pregex,
579                                    _M_flags
580                                    | regex_constants::match_not_null
581                                    | regex_constants::match_continuous))
582                     {
583                       _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
584                       _M_match.at(_M_match.size()).first = __prefix_first;
585                       _M_match._M_in_iterator = true;
586                       _M_match._M_begin = _M_begin;
587                       return *this;
588                     }
589                   else
590                     ++__start;
591                 }
592             }
593           _M_flags |= regex_constants::match_prev_avail;
594           if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
595             {
596               _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
597               _M_match.at(_M_match.size()).first = __prefix_first;
598               _M_match._M_in_iterator = true;
599               _M_match._M_begin = _M_begin;
600             }
601           else
602             _M_match = value_type();
603         }
604       return *this;
605     }
606
607   template<typename _Bi_iter,
608            typename _Ch_type,
609            typename _Rx_traits>
610     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
611     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
612     operator=(const regex_token_iterator& __rhs)
613     {
614       _M_position = __rhs._M_position;
615       _M_subs = __rhs._M_subs;
616       _M_n = __rhs._M_n;
617       _M_result = __rhs._M_result;
618       _M_suffix = __rhs._M_suffix;
619       _M_has_m1 = __rhs._M_has_m1;
620       if (__rhs._M_result == &__rhs._M_suffix)
621         _M_result = &_M_suffix;
622       return *this;
623     }
624
625   template<typename _Bi_iter,
626            typename _Ch_type,
627            typename _Rx_traits>
628     bool
629     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
630     operator==(const regex_token_iterator& __rhs) const
631     {
632       if (_M_end_of_seq() && __rhs._M_end_of_seq())
633         return true;
634       if (_M_suffix.matched && __rhs._M_suffix.matched
635           && _M_suffix == __rhs._M_suffix)
636         return true;
637       if (_M_end_of_seq() || _M_suffix.matched
638           || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
639         return false;
640       return _M_position == __rhs._M_position
641         && _M_n == __rhs._M_n
642         && _M_subs == __rhs._M_subs;
643     }
644
645   template<typename _Bi_iter,
646            typename _Ch_type,
647            typename _Rx_traits>
648     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
649     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
650     operator++()
651     {
652       _Position __prev = _M_position;
653       if (_M_suffix.matched)
654         *this = regex_token_iterator();
655       else if (_M_n + 1 < _M_subs.size())
656         {
657           _M_n++;
658           _M_result = &_M_current_match();
659         }
660       else
661         {
662           _M_n = 0;
663           ++_M_position;
664           if (_M_position != _Position())
665             _M_result = &_M_current_match();
666           else if (_M_has_m1 && __prev->suffix().length() != 0)
667             {
668               _M_suffix.matched = true;
669               _M_suffix.first = __prev->suffix().first;
670               _M_suffix.second = __prev->suffix().second;
671               _M_result = &_M_suffix;
672             }
673           else
674             *this = regex_token_iterator();
675         }
676       return *this;
677     }
678
679   template<typename _Bi_iter,
680            typename _Ch_type,
681            typename _Rx_traits>
682     void
683     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
684     _M_init(_Bi_iter __a, _Bi_iter __b)
685     {
686       _M_has_m1 = false;
687       for (auto __it : _M_subs)
688         if (__it == -1)
689           {
690             _M_has_m1 = true;
691             break;
692           }
693       if (_M_position != _Position())
694         _M_result = &_M_current_match();
695       else if (_M_has_m1)
696         {
697           _M_suffix.matched = true;
698           _M_suffix.first = __a;
699           _M_suffix.second = __b;
700           _M_result = &_M_suffix;
701         }
702       else
703         _M_result = nullptr;
704     }
705
706 _GLIBCXX_END_NAMESPACE_VERSION
707 } // namespace
708