IN2OSM  1.0.1
regex.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_INTERNAL_REGEX_H_
16 #define RAPIDJSON_INTERNAL_REGEX_H_
17 
18 #include "../allocators.h"
19 #include "../stream.h"
20 #include "stack.h"
21 
22 #ifdef __clang__
23 RAPIDJSON_DIAG_PUSH
24 RAPIDJSON_DIAG_OFF(padded)
25 RAPIDJSON_DIAG_OFF(switch-enum)
26 RAPIDJSON_DIAG_OFF(implicit-fallthrough)
27 #elif defined(_MSC_VER)
28 RAPIDJSON_DIAG_PUSH
29 RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
30 #endif
31 
32 #ifdef __GNUC__
33 RAPIDJSON_DIAG_PUSH
34 RAPIDJSON_DIAG_OFF(effc++)
35 #if __GNUC__ >= 7
36 RAPIDJSON_DIAG_OFF(implicit-fallthrough)
37 #endif
38 #endif
39 
40 #ifndef RAPIDJSON_REGEX_VERBOSE
41 #define RAPIDJSON_REGEX_VERBOSE 0
42 #endif
43 
45 namespace internal {
46 
48 // DecodedStream
49 
50 template <typename SourceStream, typename Encoding>
52 public:
53  DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
54  unsigned Peek() { return codepoint_; }
55  unsigned Take() {
56  unsigned c = codepoint_;
57  if (c) // No further decoding when '\0'
58  Decode();
59  return c;
60  }
61 
62 private:
63  void Decode() {
64  if (!Encoding::Decode(ss_, &codepoint_))
65  codepoint_ = 0;
66  }
67 
68  SourceStream& ss_;
69  unsigned codepoint_;
70 };
71 
73 // GenericRegex
74 
75 static const SizeType kRegexInvalidState = ~SizeType(0);
77 
78 template <typename Encoding, typename Allocator>
80 
82 
113 template <typename Encoding, typename Allocator = CrtAllocator>
115 public:
116  typedef Encoding EncodingType;
117  typedef typename Encoding::Ch Ch;
118  template <typename, typename> friend class GenericRegexSearch;
119 
120  GenericRegex(const Ch* source, Allocator* allocator = 0) :
121  ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_),
122  states_(allocator_, 256), ranges_(allocator_, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
123  anchorBegin_(), anchorEnd_()
124  {
127  Parse(ds);
128  }
129 
131  {
132  RAPIDJSON_DELETE(ownAllocator_);
133  }
134 
135  bool IsValid() const {
136  return root_ != kRegexInvalidState;
137  }
138 
139 private:
140  enum Operator {
146  kLeftParenthesis
147  };
148 
149  static const unsigned kAnyCharacterClass = 0xFFFFFFFF;
150  static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
151  static const unsigned kRangeNegationFlag = 0x80000000;
152 
153  struct Range {
154  unsigned start; //
155  unsigned end;
157  };
158 
159  struct State {
163  unsigned codepoint;
164  };
165 
166  struct Frag {
167  Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
171  };
172 
174  RAPIDJSON_ASSERT(index < stateCount_);
175  return states_.template Bottom<State>()[index];
176  }
177 
178  const State& GetState(SizeType index) const {
179  RAPIDJSON_ASSERT(index < stateCount_);
180  return states_.template Bottom<State>()[index];
181  }
182 
184  RAPIDJSON_ASSERT(index < rangeCount_);
185  return ranges_.template Bottom<Range>()[index];
186  }
187 
188  const Range& GetRange(SizeType index) const {
189  RAPIDJSON_ASSERT(index < rangeCount_);
190  return ranges_.template Bottom<Range>()[index];
191  }
192 
193  template <typename InputStream>
195  Stack<Allocator> operandStack(allocator_, 256); // Frag
196  Stack<Allocator> operatorStack(allocator_, 256); // Operator
197  Stack<Allocator> atomCountStack(allocator_, 256); // unsigned (Atom per parenthesis)
198 
199  *atomCountStack.template Push<unsigned>() = 0;
200 
201  unsigned codepoint;
202  while (ds.Peek() != 0) {
203  switch (codepoint = ds.Take()) {
204  case '^':
205  anchorBegin_ = true;
206  break;
207 
208  case '$':
209  anchorEnd_ = true;
210  break;
211 
212  case '|':
213  while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
214  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
215  return;
216  *operatorStack.template Push<Operator>() = kAlternation;
217  *atomCountStack.template Top<unsigned>() = 0;
218  break;
219 
220  case '(':
221  *operatorStack.template Push<Operator>() = kLeftParenthesis;
222  *atomCountStack.template Push<unsigned>() = 0;
223  break;
224 
225  case ')':
226  while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
227  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
228  return;
229  if (operatorStack.Empty())
230  return;
231  operatorStack.template Pop<Operator>(1);
232  atomCountStack.template Pop<unsigned>(1);
233  ImplicitConcatenation(atomCountStack, operatorStack);
234  break;
235 
236  case '?':
237  if (!Eval(operandStack, kZeroOrOne))
238  return;
239  break;
240 
241  case '*':
242  if (!Eval(operandStack, kZeroOrMore))
243  return;
244  break;
245 
246  case '+':
247  if (!Eval(operandStack, kOneOrMore))
248  return;
249  break;
250 
251  case '{':
252  {
253  unsigned n, m;
254  if (!ParseUnsigned(ds, &n))
255  return;
256 
257  if (ds.Peek() == ',') {
258  ds.Take();
259  if (ds.Peek() == '}')
260  m = kInfinityQuantifier;
261  else if (!ParseUnsigned(ds, &m) || m < n)
262  return;
263  }
264  else
265  m = n;
266 
267  if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
268  return;
269  ds.Take();
270  }
271  break;
272 
273  case '.':
274  PushOperand(operandStack, kAnyCharacterClass);
275  ImplicitConcatenation(atomCountStack, operatorStack);
276  break;
277 
278  case '[':
279  {
280  SizeType range;
281  if (!ParseRange(ds, &range))
282  return;
283  SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
284  GetState(s).rangeStart = range;
285  *operandStack.template Push<Frag>() = Frag(s, s, s);
286  }
287  ImplicitConcatenation(atomCountStack, operatorStack);
288  break;
289 
290  case '\\': // Escape character
291  if (!CharacterEscape(ds, &codepoint))
292  return; // Unsupported escape character
293  // fall through to default
294 
295  default: // Pattern character
296  PushOperand(operandStack, codepoint);
297  ImplicitConcatenation(atomCountStack, operatorStack);
298  }
299  }
300 
301  while (!operatorStack.Empty())
302  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
303  return;
304 
305  // Link the operand to matching state.
306  if (operandStack.GetSize() == sizeof(Frag)) {
307  Frag* e = operandStack.template Pop<Frag>(1);
308  Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
309  root_ = e->start;
310 
311 #if RAPIDJSON_REGEX_VERBOSE
312  printf("root: %d\n", root_);
313  for (SizeType i = 0; i < stateCount_ ; i++) {
314  State& s = GetState(i);
315  printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
316  }
317  printf("\n");
318 #endif
319  }
320  }
321 
322  SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
323  State* s = states_.template Push<State>();
324  s->out = out;
325  s->out1 = out1;
326  s->codepoint = codepoint;
328  return stateCount_++;
329  }
330 
331  void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
332  SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
333  *operandStack.template Push<Frag>() = Frag(s, s, s);
334  }
335 
336  void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
337  if (*atomCountStack.template Top<unsigned>())
338  *operatorStack.template Push<Operator>() = kConcatenation;
339  (*atomCountStack.template Top<unsigned>())++;
340  }
341 
343  SizeType old = l1;
344  while (GetState(l1).out != kRegexInvalidState)
345  l1 = GetState(l1).out;
346  GetState(l1).out = l2;
347  return old;
348  }
349 
350  void Patch(SizeType l, SizeType s) {
351  for (SizeType next; l != kRegexInvalidState; l = next) {
352  next = GetState(l).out;
353  GetState(l).out = s;
354  }
355  }
356 
357  bool Eval(Stack<Allocator>& operandStack, Operator op) {
358  switch (op) {
359  case kConcatenation:
360  RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
361  {
362  Frag e2 = *operandStack.template Pop<Frag>(1);
363  Frag e1 = *operandStack.template Pop<Frag>(1);
364  Patch(e1.out, e2.start);
365  *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
366  }
367  return true;
368 
369  case kAlternation:
370  if (operandStack.GetSize() >= sizeof(Frag) * 2) {
371  Frag e2 = *operandStack.template Pop<Frag>(1);
372  Frag e1 = *operandStack.template Pop<Frag>(1);
373  SizeType s = NewState(e1.start, e2.start, 0);
374  *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
375  return true;
376  }
377  return false;
378 
379  case kZeroOrOne:
380  if (operandStack.GetSize() >= sizeof(Frag)) {
381  Frag e = *operandStack.template Pop<Frag>(1);
382  SizeType s = NewState(kRegexInvalidState, e.start, 0);
383  *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
384  return true;
385  }
386  return false;
387 
388  case kZeroOrMore:
389  if (operandStack.GetSize() >= sizeof(Frag)) {
390  Frag e = *operandStack.template Pop<Frag>(1);
391  SizeType s = NewState(kRegexInvalidState, e.start, 0);
392  Patch(e.out, s);
393  *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
394  return true;
395  }
396  return false;
397 
398  case kOneOrMore:
399  if (operandStack.GetSize() >= sizeof(Frag)) {
400  Frag e = *operandStack.template Pop<Frag>(1);
401  SizeType s = NewState(kRegexInvalidState, e.start, 0);
402  Patch(e.out, s);
403  *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
404  return true;
405  }
406  return false;
407 
408  default:
409  // syntax error (e.g. unclosed kLeftParenthesis)
410  return false;
411  }
412  }
413 
414  bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
415  RAPIDJSON_ASSERT(n <= m);
416  RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
417 
418  if (n == 0) {
419  if (m == 0) // a{0} not support
420  return false;
421  else if (m == kInfinityQuantifier)
422  Eval(operandStack, kZeroOrMore); // a{0,} -> a*
423  else {
424  Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
425  for (unsigned i = 0; i < m - 1; i++)
426  CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
427  for (unsigned i = 0; i < m - 1; i++)
428  Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
429  }
430  return true;
431  }
432 
433  for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
434  CloneTopOperand(operandStack);
435 
436  if (m == kInfinityQuantifier)
437  Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
438  else if (m > n) {
439  CloneTopOperand(operandStack); // a{3,5} -> a a a a
440  Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
441  for (unsigned i = n; i < m - 1; i++)
442  CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
443  for (unsigned i = n; i < m; i++)
444  Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
445  }
446 
447  for (unsigned i = 0; i < n - 1; i++)
448  Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
449 
450  return true;
451  }
452 
453  static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
454 
455  void CloneTopOperand(Stack<Allocator>& operandStack) {
456  const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
457  SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
458  State* s = states_.template Push<State>(count);
459  memcpy(s, &GetState(src.minIndex), count * sizeof(State));
460  for (SizeType j = 0; j < count; j++) {
461  if (s[j].out != kRegexInvalidState)
462  s[j].out += count;
463  if (s[j].out1 != kRegexInvalidState)
464  s[j].out1 += count;
465  }
466  *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
467  stateCount_ += count;
468  }
469 
470  template <typename InputStream>
472  unsigned r = 0;
473  if (ds.Peek() < '0' || ds.Peek() > '9')
474  return false;
475  while (ds.Peek() >= '0' && ds.Peek() <= '9') {
476  if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
477  return false; // overflow
478  r = r * 10 + (ds.Take() - '0');
479  }
480  *u = r;
481  return true;
482  }
483 
484  template <typename InputStream>
486  bool isBegin = true;
487  bool negate = false;
488  int step = 0;
490  SizeType current = kRegexInvalidRange;
491  unsigned codepoint;
492  while ((codepoint = ds.Take()) != 0) {
493  if (isBegin) {
494  isBegin = false;
495  if (codepoint == '^') {
496  negate = true;
497  continue;
498  }
499  }
500 
501  switch (codepoint) {
502  case ']':
503  if (start == kRegexInvalidRange)
504  return false; // Error: nothing inside []
505  if (step == 2) { // Add trailing '-'
506  SizeType r = NewRange('-');
507  RAPIDJSON_ASSERT(current != kRegexInvalidRange);
508  GetRange(current).next = r;
509  }
510  if (negate)
511  GetRange(start).start |= kRangeNegationFlag;
512  *range = start;
513  return true;
514 
515  case '\\':
516  if (ds.Peek() == 'b') {
517  ds.Take();
518  codepoint = 0x0008; // Escape backspace character
519  }
520  else if (!CharacterEscape(ds, &codepoint))
521  return false;
522  // fall through to default
523 
524  default:
525  switch (step) {
526  case 1:
527  if (codepoint == '-') {
528  step++;
529  break;
530  }
531  // fall through to step 0 for other characters
532 
533  case 0:
534  {
535  SizeType r = NewRange(codepoint);
536  if (current != kRegexInvalidRange)
537  GetRange(current).next = r;
538  if (start == kRegexInvalidRange)
539  start = r;
540  current = r;
541  }
542  step = 1;
543  break;
544 
545  default:
546  RAPIDJSON_ASSERT(step == 2);
547  GetRange(current).end = codepoint;
548  step = 0;
549  }
550  }
551  }
552  return false;
553  }
554 
555  SizeType NewRange(unsigned codepoint) {
556  Range* r = ranges_.template Push<Range>();
557  r->start = r->end = codepoint;
559  return rangeCount_++;
560  }
561 
562  template <typename InputStream>
563  bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
564  unsigned codepoint;
565  switch (codepoint = ds.Take()) {
566  case '^':
567  case '$':
568  case '|':
569  case '(':
570  case ')':
571  case '?':
572  case '*':
573  case '+':
574  case '.':
575  case '[':
576  case ']':
577  case '{':
578  case '}':
579  case '\\':
580  *escapedCodepoint = codepoint; return true;
581  case 'f': *escapedCodepoint = 0x000C; return true;
582  case 'n': *escapedCodepoint = 0x000A; return true;
583  case 'r': *escapedCodepoint = 0x000D; return true;
584  case 't': *escapedCodepoint = 0x0009; return true;
585  case 'v': *escapedCodepoint = 0x000B; return true;
586  default:
587  return false; // Unsupported escape character
588  }
589  }
590 
591  Allocator* ownAllocator_;
592  Allocator* allocator_;
598 
599  static const unsigned kInfinityQuantifier = ~0u;
600 
601  // For SearchWithAnchoring()
604 };
605 
606 template <typename RegexType, typename Allocator = CrtAllocator>
607 class GenericRegexSearch {
608 public:
609  typedef typename RegexType::EncodingType Encoding;
610  typedef typename Encoding::Ch Ch;
611 
612  GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) :
613  regex_(regex), allocator_(allocator), ownAllocator_(0),
614  state0_(allocator, 0), state1_(allocator, 0), stateSet_()
615  {
616  RAPIDJSON_ASSERT(regex_.IsValid());
617  if (!allocator_)
618  ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
619  stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
620  state0_.template Reserve<SizeType>(regex_.stateCount_);
621  state1_.template Reserve<SizeType>(regex_.stateCount_);
622  }
623 
625  Allocator::Free(stateSet_);
626  RAPIDJSON_DELETE(ownAllocator_);
627  }
628 
629  template <typename InputStream>
630  bool Match(InputStream& is) {
631  return SearchWithAnchoring(is, true, true);
632  }
633 
634  bool Match(const Ch* s) {
636  return Match(is);
637  }
638 
639  template <typename InputStream>
640  bool Search(InputStream& is) {
641  return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
642  }
643 
644  bool Search(const Ch* s) {
646  return Search(is);
647  }
648 
649 private:
650  typedef typename RegexType::State State;
651  typedef typename RegexType::Range Range;
652 
653  template <typename InputStream>
654  bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
656 
657  state0_.Clear();
658  Stack<Allocator> *current = &state0_, *next = &state1_;
659  const size_t stateSetSize = GetStateSetSize();
660  std::memset(stateSet_, 0, stateSetSize);
661 
662  bool matched = AddState(*current, regex_.root_);
663  unsigned codepoint;
664  while (!current->Empty() && (codepoint = ds.Take()) != 0) {
665  std::memset(stateSet_, 0, stateSetSize);
666  next->Clear();
667  matched = false;
668  for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
669  const State& sr = regex_.GetState(*s);
670  if (sr.codepoint == codepoint ||
671  sr.codepoint == RegexType::kAnyCharacterClass ||
672  (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
673  {
674  matched = AddState(*next, sr.out) || matched;
675  if (!anchorEnd && matched)
676  return true;
677  }
678  if (!anchorBegin)
679  AddState(*next, regex_.root_);
680  }
681  internal::Swap(current, next);
682  }
683 
684  return matched;
685  }
686 
687  size_t GetStateSetSize() const {
688  return (regex_.stateCount_ + 31) / 32 * 4;
689  }
690 
691  // Return whether the added states is a match state
693  RAPIDJSON_ASSERT(index != kRegexInvalidState);
694 
695  const State& s = regex_.GetState(index);
696  if (s.out1 != kRegexInvalidState) { // Split
697  bool matched = AddState(l, s.out);
698  return AddState(l, s.out1) || matched;
699  }
700  else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
701  stateSet_[index >> 5] |= (1u << (index & 31));
702  *l.template PushUnsafe<SizeType>() = index;
703  }
704  return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
705  }
706 
707  bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
708  bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
709  while (rangeIndex != kRegexInvalidRange) {
710  const Range& r = regex_.GetRange(rangeIndex);
711  if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
712  return yes;
713  rangeIndex = r.next;
714  }
715  return !yes;
716  }
717 
718  const RegexType& regex_;
719  Allocator* allocator_;
720  Allocator* ownAllocator_;
724 };
725 
728 
729 } // namespace internal
731 
732 #ifdef __GNUC__
733 RAPIDJSON_DIAG_POP
734 #endif
735 
736 #if defined(__clang__) || defined(_MSC_VER)
737 RAPIDJSON_DIAG_POP
738 #endif
739 
740 #endif // RAPIDJSON_INTERNAL_REGEX_H_
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const
Definition: regex.h:707
Stack< Allocator > states_
Definition: regex.h:593
Allocator * allocator_
Definition: regex.h:592
const CharType(& source)[N]
Definition: pointer.h:1204
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
bool IsValid() const
Definition: regex.h:135
void Parse(DecodedStream< InputStream, Encoding > &ds)
Definition: regex.h:194
GenericRegex(const Ch *source, Allocator *allocator=0)
Definition: regex.h:120
GenericRegexSearch(const RegexType &regex, Allocator *allocator=0)
Definition: regex.h:612
unsigned Take()
Definition: regex.h:55
SizeType rangeCount_
Definition: regex.h:597
Allocator * ownAllocator_
Definition: regex.h:720
SizeType NewRange(unsigned codepoint)
Definition: regex.h:555
unsigned Peek()
Definition: regex.h:54
bool ParseRange(DecodedStream< InputStream, Encoding > &ds, SizeType *range)
Definition: regex.h:485
GenericRegex< UTF8<> > Regex
Definition: regex.h:726
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
Stack< Allocator > state0_
Definition: regex.h:721
GenericRegexSearch< Regex > RegexSearch
Definition: regex.h:727
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition: regex.h:563
SizeType out
Equals to kInvalid for matching state.
Definition: regex.h:160
SizeType out1
Equals to non-kInvalid for split.
Definition: regex.h:161
const Range & GetRange(SizeType index) const
Definition: regex.h:188
#define RAPIDJSON_NEW(TypeName)
! customization point for global new
Definition: rapidjson.h:625
bool EvalQuantifier(Stack< Allocator > &operandStack, unsigned n, unsigned m)
Definition: regex.h:414
bool Match(InputStream &is)
Definition: regex.h:630
static SizeType Min(SizeType a, SizeType b)
Definition: regex.h:453
unsigned int uint32_t
Definition: stdint.h:126
Range & GetRange(SizeType index)
Definition: regex.h:183
Frag(SizeType s, SizeType o, SizeType m)
Definition: regex.h:167
bool Search(const Ch *s)
Definition: regex.h:644
Read-only string stream.
Definition: fwd.h:47
SizeType Append(SizeType l1, SizeType l2)
Definition: regex.h:342
static const SizeType kRegexInvalidRange
Definition: regex.h:76
bool Empty() const
Definition: stack.h:176
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:322
size_t GetSize() const
Definition: stack.h:177
Encoding::Ch Ch
Definition: regex.h:117
DecodedStream(SourceStream &ss)
Definition: regex.h:53
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:124
RegexType::Range Range
Definition: regex.h:651
void Swap(T &a, T &b) RAPIDJSON_NOEXCEPT
Custom swap() to avoid dependency on C++ <algorithm> header.
Definition: swap.h:33
#define RAPIDJSON_DELETE(x)
! customization point for global delete
Definition: rapidjson.h:629
SizeType out
link-list of all output states
Definition: regex.h:169
Stack< Allocator > state1_
Definition: regex.h:722
void Patch(SizeType l, SizeType s)
Definition: regex.h:350
A type-unsafe stack for storing different types of data.
Definition: stack.h:36
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition: regex.h:357
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1181
Encoding EncodingType
Definition: regex.h:116
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
RegexType::EncodingType Encoding
Definition: regex.h:609
bool ParseUnsigned(DecodedStream< InputStream, Encoding > &ds, unsigned *u)
Definition: regex.h:471
Stack< Allocator > ranges_
Definition: regex.h:594
State & GetState(SizeType index)
Definition: regex.h:173
Regular expression engine with subset of ECMAscript grammar.
Definition: regex.h:114
size_t GetStateSetSize() const
Definition: regex.h:687
bool SearchWithAnchoring(InputStream &is, bool anchorBegin, bool anchorEnd)
Definition: regex.h:654
SizeType stateCount_
Definition: regex.h:596
Allocator * ownAllocator_
Definition: regex.h:591
void ImplicitConcatenation(Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
Definition: regex.h:336
unsigned codepoint_
Definition: regex.h:69
void PushOperand(Stack< Allocator > &operandStack, unsigned codepoint)
Definition: regex.h:331
const RegexType & regex_
Definition: regex.h:718
Allocator * allocator_
Definition: regex.h:719
bool Search(InputStream &is)
Definition: regex.h:640
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
SourceStream & ss_
Definition: regex.h:68
bool Match(const Ch *s)
Definition: regex.h:634
const State & GetState(SizeType index) const
Definition: regex.h:178
bool AddState(Stack< Allocator > &l, SizeType index)
Definition: regex.h:692
RegexType::State State
Definition: regex.h:650
void CloneTopOperand(Stack< Allocator > &operandStack)
Definition: regex.h:455