libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // A non-standard switch to let the user pick the matching algorithm.
32 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
33 // algorithm will be used. This algorithm is not enabled by default,
34 // and cannot be used if the regex contains back-references, but has better
35 // (polynomial instead of exponential) worst case performance.
36 // See __regex_algo_impl below.
37 
38 namespace std _GLIBCXX_VISIBILITY(default)
39 {
40 namespace __detail
41 {
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43 
44  // Result of merging regex_match and regex_search.
45  //
46  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47  // the other one if possible, for test purpose).
48  //
49  // That __match_mode is true means regex_match, else regex_search.
50  template<typename _BiIter, typename _Alloc,
51  typename _CharT, typename _TraitsT,
52  _RegexExecutorPolicy __policy,
53  bool __match_mode>
54  bool
55  __regex_algo_impl(_BiIter __s,
56  _BiIter __e,
57  match_results<_BiIter, _Alloc>& __m,
58  const basic_regex<_CharT, _TraitsT>& __re,
60  {
61  if (__re._M_automaton == nullptr)
62  return false;
63 
64  typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65  __m._M_begin = __s;
66  __m._M_resize(__re._M_automaton->_M_sub_count());
67  for (auto& __it : __res)
68  __it.matched = false;
69 
70  // __policy is used by testsuites so that they can use Thompson NFA
71  // without defining a macro. Users should define
72  // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
73  bool __ret;
74  if (!__re._M_automaton->_M_has_backref
75  && !(__re._M_flags & regex_constants::ECMAScript)
76 #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
77  && __policy == _RegexExecutorPolicy::_S_alternate
78 #endif
79  )
80  {
81  _Executor<_BiIter, _Alloc, _TraitsT, false>
82  __executor(__s, __e, __m, __re, __flags);
83  if (__match_mode)
84  __ret = __executor._M_match();
85  else
86  __ret = __executor._M_search();
87  }
88  else
89  {
90  _Executor<_BiIter, _Alloc, _TraitsT, true>
91  __executor(__s, __e, __m, __re, __flags);
92  if (__match_mode)
93  __ret = __executor._M_match();
94  else
95  __ret = __executor._M_search();
96  }
97  if (__ret)
98  {
99  for (auto& __it : __res)
100  if (!__it.matched)
101  __it.first = __it.second = __e;
102  auto& __pre = __m._M_prefix();
103  auto& __suf = __m._M_suffix();
104  if (__match_mode)
105  {
106  __pre.matched = false;
107  __pre.first = __s;
108  __pre.second = __s;
109  __suf.matched = false;
110  __suf.first = __e;
111  __suf.second = __e;
112  }
113  else
114  {
115  __pre.first = __s;
116  __pre.second = __res[0].first;
117  __pre.matched = (__pre.first != __pre.second);
118  __suf.first = __res[0].second;
119  __suf.second = __e;
120  __suf.matched = (__suf.first != __suf.second);
121  }
122  }
123  else
124  {
125  __m._M_resize(0);
126  for (auto& __it : __res)
127  {
128  __it.matched = false;
129  __it.first = __it.second = __e;
130  }
131  }
132  return __ret;
133  }
134 
135 _GLIBCXX_END_NAMESPACE_VERSION
136 }
137 
138 _GLIBCXX_BEGIN_NAMESPACE_VERSION
139 
140  template<typename _Ch_type>
141  template<typename _Fwd_iter>
142  typename regex_traits<_Ch_type>::string_type
144  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
145  {
146  typedef std::ctype<char_type> __ctype_type;
147  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
148 
149  static const char* __collatenames[] =
150  {
151  "NUL",
152  "SOH",
153  "STX",
154  "ETX",
155  "EOT",
156  "ENQ",
157  "ACK",
158  "alert",
159  "backspace",
160  "tab",
161  "newline",
162  "vertical-tab",
163  "form-feed",
164  "carriage-return",
165  "SO",
166  "SI",
167  "DLE",
168  "DC1",
169  "DC2",
170  "DC3",
171  "DC4",
172  "NAK",
173  "SYN",
174  "ETB",
175  "CAN",
176  "EM",
177  "SUB",
178  "ESC",
179  "IS4",
180  "IS3",
181  "IS2",
182  "IS1",
183  "space",
184  "exclamation-mark",
185  "quotation-mark",
186  "number-sign",
187  "dollar-sign",
188  "percent-sign",
189  "ampersand",
190  "apostrophe",
191  "left-parenthesis",
192  "right-parenthesis",
193  "asterisk",
194  "plus-sign",
195  "comma",
196  "hyphen",
197  "period",
198  "slash",
199  "zero",
200  "one",
201  "two",
202  "three",
203  "four",
204  "five",
205  "six",
206  "seven",
207  "eight",
208  "nine",
209  "colon",
210  "semicolon",
211  "less-than-sign",
212  "equals-sign",
213  "greater-than-sign",
214  "question-mark",
215  "commercial-at",
216  "A",
217  "B",
218  "C",
219  "D",
220  "E",
221  "F",
222  "G",
223  "H",
224  "I",
225  "J",
226  "K",
227  "L",
228  "M",
229  "N",
230  "O",
231  "P",
232  "Q",
233  "R",
234  "S",
235  "T",
236  "U",
237  "V",
238  "W",
239  "X",
240  "Y",
241  "Z",
242  "left-square-bracket",
243  "backslash",
244  "right-square-bracket",
245  "circumflex",
246  "underscore",
247  "grave-accent",
248  "a",
249  "b",
250  "c",
251  "d",
252  "e",
253  "f",
254  "g",
255  "h",
256  "i",
257  "j",
258  "k",
259  "l",
260  "m",
261  "n",
262  "o",
263  "p",
264  "q",
265  "r",
266  "s",
267  "t",
268  "u",
269  "v",
270  "w",
271  "x",
272  "y",
273  "z",
274  "left-curly-bracket",
275  "vertical-line",
276  "right-curly-bracket",
277  "tilde",
278  "DEL",
279  };
280 
281  string __s;
282  for (; __first != __last; ++__first)
283  __s += __fctyp.narrow(*__first, 0);
284 
285  for (const auto& __it : __collatenames)
286  if (__s == __it)
287  return string_type(1, __fctyp.widen(
288  static_cast<char>(&__it - __collatenames)));
289 
290  // TODO Add digraph support:
291  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
292 
293  return string_type();
294  }
295 
296  template<typename _Ch_type>
297  template<typename _Fwd_iter>
298  typename regex_traits<_Ch_type>::char_class_type
300  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
301  {
302  typedef std::ctype<char_type> __ctype_type;
303  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
304 
305  // Mappings from class name to class mask.
306  static const pair<const char*, char_class_type> __classnames[] =
307  {
308  {"d", ctype_base::digit},
309  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
310  {"s", ctype_base::space},
311  {"alnum", ctype_base::alnum},
312  {"alpha", ctype_base::alpha},
313  {"blank", ctype_base::blank},
314  {"cntrl", ctype_base::cntrl},
315  {"digit", ctype_base::digit},
316  {"graph", ctype_base::graph},
317  {"lower", ctype_base::lower},
318  {"print", ctype_base::print},
319  {"punct", ctype_base::punct},
320  {"space", ctype_base::space},
321  {"upper", ctype_base::upper},
322  {"xdigit", ctype_base::xdigit},
323  };
324 
325  string __s;
326  for (; __first != __last; ++__first)
327  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
328 
329  for (const auto& __it : __classnames)
330  if (__s == __it.first)
331  {
332  if (__icase
333  && ((__it.second
334  & (ctype_base::lower | ctype_base::upper)) != 0))
335  return ctype_base::alpha;
336  return __it.second;
337  }
338  return 0;
339  }
340 
341  template<typename _Ch_type>
342  bool
344  isctype(_Ch_type __c, char_class_type __f) const
345  {
346  typedef std::ctype<char_type> __ctype_type;
347  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
348 
349  return __fctyp.is(__f._M_base, __c)
350  // [[:w:]]
351  || ((__f._M_extended & _RegexMask::_S_under)
352  && __c == __fctyp.widen('_'));
353  }
354 
355  template<typename _Ch_type>
356  int
358  value(_Ch_type __ch, int __radix) const
359  {
361  long __v;
362  if (__radix == 8)
363  __is >> std::oct;
364  else if (__radix == 16)
365  __is >> std::hex;
366  __is >> __v;
367  return __is.fail() ? -1 : __v;
368  }
369 
370  template<typename _Bi_iter, typename _Alloc>
371  template<typename _Out_iter>
373  format(_Out_iter __out,
374  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
375  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
376  match_flag_type __flags) const
377  {
378  _GLIBCXX_DEBUG_ASSERT( ready() );
379  regex_traits<char_type> __traits;
380  typedef std::ctype<char_type> __ctype_type;
381  const __ctype_type&
382  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
383 
384  auto __output = [&](size_t __idx)
385  {
386  auto& __sub = (*this)[__idx];
387  if (__sub.matched)
388  __out = std::copy(__sub.first, __sub.second, __out);
389  };
390 
391  if (__flags & regex_constants::format_sed)
392  {
393  for (; __fmt_first != __fmt_last;)
394  if (*__fmt_first == '&')
395  {
396  __output(0);
397  ++__fmt_first;
398  }
399  else if (*__fmt_first == '\\')
400  {
401  if (++__fmt_first != __fmt_last
402  && __fctyp.is(__ctype_type::digit, *__fmt_first))
403  __output(__traits.value(*__fmt_first++, 10));
404  else
405  *__out++ = '\\';
406  }
407  else
408  *__out++ = *__fmt_first++;
409  }
410  else
411  {
412  while (1)
413  {
414  auto __next = std::find(__fmt_first, __fmt_last, '$');
415  if (__next == __fmt_last)
416  break;
417 
418  __out = std::copy(__fmt_first, __next, __out);
419 
420  auto __eat = [&](char __ch) -> bool
421  {
422  if (*__next == __ch)
423  {
424  ++__next;
425  return true;
426  }
427  return false;
428  };
429 
430  if (++__next == __fmt_last)
431  *__out++ = '$';
432  else if (__eat('$'))
433  *__out++ = '$';
434  else if (__eat('&'))
435  __output(0);
436  else if (__eat('`'))
437  {
438  auto& __sub = _M_prefix();
439  if (__sub.matched)
440  __out = std::copy(__sub.first, __sub.second, __out);
441  }
442  else if (__eat('\''))
443  {
444  auto& __sub = _M_suffix();
445  if (__sub.matched)
446  __out = std::copy(__sub.first, __sub.second, __out);
447  }
448  else if (__fctyp.is(__ctype_type::digit, *__next))
449  {
450  long __num = __traits.value(*__next, 10);
451  if (++__next != __fmt_last
452  && __fctyp.is(__ctype_type::digit, *__next))
453  {
454  __num *= 10;
455  __num += __traits.value(*__next++, 10);
456  }
457  if (0 <= __num && __num < this->size())
458  __output(__num);
459  }
460  else
461  *__out++ = '$';
462  __fmt_first = __next;
463  }
464  __out = std::copy(__fmt_first, __fmt_last, __out);
465  }
466  return __out;
467  }
468 
469  template<typename _Out_iter, typename _Bi_iter,
470  typename _Rx_traits, typename _Ch_type>
471  _Out_iter
472  regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
474  const _Ch_type* __fmt,
476  {
478  _IterT __i(__first, __last, __e, __flags);
479  _IterT __end;
480  if (__i == __end)
481  {
482  if (!(__flags & regex_constants::format_no_copy))
483  __out = std::copy(__first, __last, __out);
484  }
485  else
486  {
487  sub_match<_Bi_iter> __last;
488  auto __len = char_traits<_Ch_type>::length(__fmt);
489  for (; __i != __end; ++__i)
490  {
491  if (!(__flags & regex_constants::format_no_copy))
492  __out = std::copy(__i->prefix().first, __i->prefix().second,
493  __out);
494  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
495  __last = __i->suffix();
497  break;
498  }
499  if (!(__flags & regex_constants::format_no_copy))
500  __out = std::copy(__last.first, __last.second, __out);
501  }
502  return __out;
503  }
504 
505  template<typename _Bi_iter,
506  typename _Ch_type,
507  typename _Rx_traits>
508  bool
510  operator==(const regex_iterator& __rhs) const
511  {
512  if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
513  return true;
514  return _M_pregex == __rhs._M_pregex
515  && _M_begin == __rhs._M_begin
516  && _M_end == __rhs._M_end
517  && _M_flags == __rhs._M_flags
518  && _M_match[0] == __rhs._M_match[0];
519  }
520 
521  template<typename _Bi_iter,
522  typename _Ch_type,
523  typename _Rx_traits>
527  {
528  // In all cases in which the call to regex_search returns true,
529  // match.prefix().first shall be equal to the previous value of
530  // match[0].second, and for each index i in the half-open range
531  // [0, match.size()) for which match[i].matched is true,
532  // match[i].position() shall return distance(begin, match[i].first).
533  // [28.12.1.4.5]
534  if (_M_match[0].matched)
535  {
536  auto __start = _M_match[0].second;
537  auto __prefix_first = _M_match[0].second;
538  if (_M_match[0].first == _M_match[0].second)
539  {
540  if (__start == _M_end)
541  {
542  _M_pregex = nullptr;
543  return *this;
544  }
545  else
546  {
547  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
548  _M_flags
551  {
552  _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
553  auto& __prefix = _M_match._M_prefix();
554  __prefix.first = __prefix_first;
555  __prefix.matched = __prefix.first != __prefix.second;
556  // [28.12.1.4.5]
557  _M_match._M_begin = _M_begin;
558  return *this;
559  }
560  else
561  ++__start;
562  }
563  }
565  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
566  {
567  _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
568  auto& __prefix = _M_match._M_prefix();
569  __prefix.first = __prefix_first;
570  __prefix.matched = __prefix.first != __prefix.second;
571  // [28.12.1.4.5]
572  _M_match._M_begin = _M_begin;
573  }
574  else
575  _M_pregex = nullptr;
576  }
577  return *this;
578  }
579 
580  template<typename _Bi_iter,
581  typename _Ch_type,
582  typename _Rx_traits>
586  {
587  _M_position = __rhs._M_position;
588  _M_subs = __rhs._M_subs;
589  _M_n = __rhs._M_n;
590  _M_suffix = __rhs._M_suffix;
591  _M_has_m1 = __rhs._M_has_m1;
592  _M_normalize_result();
593  return *this;
594  }
595 
596  template<typename _Bi_iter,
597  typename _Ch_type,
598  typename _Rx_traits>
599  bool
602  {
603  if (_M_end_of_seq() && __rhs._M_end_of_seq())
604  return true;
605  if (_M_suffix.matched && __rhs._M_suffix.matched
606  && _M_suffix == __rhs._M_suffix)
607  return true;
608  if (_M_end_of_seq() || _M_suffix.matched
609  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
610  return false;
611  return _M_position == __rhs._M_position
612  && _M_n == __rhs._M_n
613  && _M_subs == __rhs._M_subs;
614  }
615 
616  template<typename _Bi_iter,
617  typename _Ch_type,
618  typename _Rx_traits>
622  {
623  _Position __prev = _M_position;
624  if (_M_suffix.matched)
625  *this = regex_token_iterator();
626  else if (_M_n + 1 < _M_subs.size())
627  {
628  _M_n++;
629  _M_result = &_M_current_match();
630  }
631  else
632  {
633  _M_n = 0;
634  ++_M_position;
635  if (_M_position != _Position())
636  _M_result = &_M_current_match();
637  else if (_M_has_m1 && __prev->suffix().length() != 0)
638  {
639  _M_suffix.matched = true;
640  _M_suffix.first = __prev->suffix().first;
641  _M_suffix.second = __prev->suffix().second;
642  _M_result = &_M_suffix;
643  }
644  else
645  *this = regex_token_iterator();
646  }
647  return *this;
648  }
649 
650  template<typename _Bi_iter,
651  typename _Ch_type,
652  typename _Rx_traits>
653  void
655  _M_init(_Bi_iter __a, _Bi_iter __b)
656  {
657  _M_has_m1 = false;
658  for (auto __it : _M_subs)
659  if (__it == -1)
660  {
661  _M_has_m1 = true;
662  break;
663  }
664  if (_M_position != _Position())
665  _M_result = &_M_current_match();
666  else if (_M_has_m1)
667  {
668  _M_suffix.matched = true;
669  _M_suffix.first = __a;
670  _M_suffix.second = __b;
671  _M_result = &_M_suffix;
672  }
673  else
674  _M_result = nullptr;
675  }
676 
677 _GLIBCXX_END_NAMESPACE_VERSION
678 } // namespace
679 
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:358
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:601
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1024
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:621
Controlling input for std::string.
Definition: iosfwd:100
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:344
bool operator==(const regex_iterator &__rhs) const
Tests the equivalence of two regex iterators.
Definition: regex.tcc:510
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2146
constexpr syntax_option_type ECMAScript
constexpr match_flag_type match_not_null
constexpr match_flag_type match_prev_avail
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:300
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1032
_T2 second
first is a copy of the first object
Definition: stl_pair.h:102
constexpr match_flag_type format_sed
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:526
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:585
ISO C++ entities toplevel namespace is std.
Primary class template ctype facet.This template class defines classification and conversion function...
Basis for explicit traits specializations.
Definition: char_traits.h:227
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:96
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
constexpr match_flag_type match_continuous
Describes aspects of a regular expression.
Definition: regex.h:87
match_flag_type
This is a bitmask type indicating regex matching rules.
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:377
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:144
bool fail() const
Fast error checking.
Definition: basic_ios.h:201
constexpr match_flag_type format_no_copy
constexpr match_flag_type format_first_only
Managing sequences of characters and character-like objects.
_T1 first
second_type is the second bound type
Definition: stl_pair.h:101
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2294