IN2OSM  1.0.1
GenericRegex< Encoding, Allocator >

Regular expression engine with subset of ECMAscript grammar. More...

#include <regex.h>

Collaboration diagram for GenericRegex< Encoding, Allocator >:
Collaboration graph

Classes

struct  Frag
 
struct  Range
 
struct  State
 

Public Types

typedef Encoding EncodingType
 
typedef Encoding::Ch Ch
 

Public Member Functions

 GenericRegex (const Ch *source, Allocator *allocator=0)
 
 ~GenericRegex ()
 
bool IsValid () const
 

Private Types

enum  Operator {
  kZeroOrOne, kZeroOrMore, kOneOrMore, kConcatenation,
  kAlternation, kLeftParenthesis
}
 

Private Member Functions

StateGetState (SizeType index)
 
const StateGetState (SizeType index) const
 
RangeGetRange (SizeType index)
 
const RangeGetRange (SizeType index) const
 
template<typename InputStream >
void Parse (DecodedStream< InputStream, Encoding > &ds)
 
SizeType NewState (SizeType out, SizeType out1, unsigned codepoint)
 
void PushOperand (Stack< Allocator > &operandStack, unsigned codepoint)
 
void ImplicitConcatenation (Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
 
SizeType Append (SizeType l1, SizeType l2)
 
void Patch (SizeType l, SizeType s)
 
bool Eval (Stack< Allocator > &operandStack, Operator op)
 
bool EvalQuantifier (Stack< Allocator > &operandStack, unsigned n, unsigned m)
 
void CloneTopOperand (Stack< Allocator > &operandStack)
 
template<typename InputStream >
bool ParseUnsigned (DecodedStream< InputStream, Encoding > &ds, unsigned *u)
 
template<typename InputStream >
bool ParseRange (DecodedStream< InputStream, Encoding > &ds, SizeType *range)
 
SizeType NewRange (unsigned codepoint)
 
template<typename InputStream >
bool CharacterEscape (DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
 

Static Private Member Functions

static SizeType Min (SizeType a, SizeType b)
 

Private Attributes

Allocator * ownAllocator_
 
Allocator * allocator_
 
Stack< Allocator > states_
 
Stack< Allocator > ranges_
 
SizeType root_
 
SizeType stateCount_
 
SizeType rangeCount_
 
bool anchorBegin_
 
bool anchorEnd_
 

Static Private Attributes

static const unsigned kAnyCharacterClass = 0xFFFFFFFF
 For '.'. More...
 
static const unsigned kRangeCharacterClass = 0xFFFFFFFE
 
static const unsigned kRangeNegationFlag = 0x80000000
 
static const unsigned kInfinityQuantifier = ~0u
 

Friends

template<typename , typename >
class GenericRegexSearch
 

Detailed Description

template<typename Encoding, typename Allocator = CrtAllocator>
class internal::GenericRegex< Encoding, Allocator >

Regular expression engine with subset of ECMAscript grammar.

Supported regular expression syntax:

  • ab Concatenation
  • a|b Alternation
  • a? Zero or one
  • a* Zero or more
  • a+ One or more
  • a{3} Exactly 3 times
  • a{3,} At least 3 times
  • a{3,5} 3 to 5 times
  • (ab) Grouping
  • ^a At the beginning
  • a$ At the end
  • . Any character
  • [abc] Character classes
  • [a-c] Character class range
  • [a-z0-9_] Character class combination
  • [^abc] Negated character classes
  • [^a-c] Negated character class range
  • [] Backspace (U+0008)
  • \| \\ ... Escape characters
  • \f Form feed (U+000C)
  • \n Line feed (U+000A)
  • \r Carriage return (U+000D)
  • \t Tab (U+0009)
  • \v Vertical tab (U+000B)
Note
This is a Thompson NFA engine, implemented with reference to Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", https://swtch.com/~rsc/regexp/regexp1.html

Definition at line 114 of file regex.h.

Member Typedef Documentation

◆ Ch

typedef Encoding::Ch Ch

Definition at line 117 of file regex.h.

◆ EncodingType

typedef Encoding EncodingType

Definition at line 116 of file regex.h.

Member Enumeration Documentation

◆ Operator

enum Operator
private
Enumerator
kZeroOrOne 
kZeroOrMore 
kOneOrMore 
kConcatenation 
kAlternation 
kLeftParenthesis 

Definition at line 140 of file regex.h.

Constructor & Destructor Documentation

◆ GenericRegex()

GenericRegex ( const Ch source,
Allocator *  allocator = 0 
)
inline

Definition at line 120 of file regex.h.

120  :
121  ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_),
124  {
126  DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
127  Parse(ds);
128  }
Stack< Allocator > states_
Definition: regex.h:593
Allocator * allocator_
Definition: regex.h:592
const CharType(& source)[N]
Definition: pointer.h:1204
void Parse(DecodedStream< InputStream, Encoding > &ds)
Definition: regex.h:194
SizeType rangeCount_
Definition: regex.h:597
#define RAPIDJSON_NEW(TypeName)
! customization point for global new
Definition: rapidjson.h:625
Read-only string stream.
Definition: fwd.h:47
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
Stack< Allocator > ranges_
Definition: regex.h:594
SizeType stateCount_
Definition: regex.h:596
Allocator * ownAllocator_
Definition: regex.h:591

◆ ~GenericRegex()

~GenericRegex ( )
inline

Definition at line 130 of file regex.h.

131  {
133  }
#define RAPIDJSON_DELETE(x)
! customization point for global delete
Definition: rapidjson.h:629
Allocator * ownAllocator_
Definition: regex.h:591

Member Function Documentation

◆ Append()

SizeType Append ( SizeType  l1,
SizeType  l2 
)
inlineprivate

Definition at line 342 of file regex.h.

342  {
343  SizeType old = l1;
344  while (GetState(l1).out != kRegexInvalidState)
345  l1 = GetState(l1).out;
346  GetState(l1).out = l2;
347  return old;
348  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
SizeType out
Equals to kInvalid for matching state.
Definition: regex.h:160
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
State & GetState(SizeType index)
Definition: regex.h:173

◆ CharacterEscape()

bool CharacterEscape ( DecodedStream< InputStream, Encoding > &  ds,
unsigned *  escapedCodepoint 
)
inlineprivate

Definition at line 563 of file regex.h.

563  {
564  unsigned codepoint;
565  switch (codepoint = ds.Take()) {
566  case '^':
567  case '$':
568  case '|':
569  case '(':
570  case ')':
571  case '?':
572  case '*':
573  case '+':
574  case '.':
575  case '[':
576  case ']':
577  case '{':
578  case '}':
579  case '\\':
580  *escapedCodepoint = codepoint; return true;
581  case 'f': *escapedCodepoint = 0x000C; return true;
582  case 'n': *escapedCodepoint = 0x000A; return true;
583  case 'r': *escapedCodepoint = 0x000D; return true;
584  case 't': *escapedCodepoint = 0x0009; return true;
585  case 'v': *escapedCodepoint = 0x000B; return true;
586  default:
587  return false; // Unsupported escape character
588  }
589  }
Here is the call graph for this function:

◆ CloneTopOperand()

void CloneTopOperand ( Stack< Allocator > &  operandStack)
inlineprivate

Definition at line 455 of file regex.h.

455  {
456  const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
457  SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
458  State* s = states_.template Push<State>(count);
459  memcpy(s, &GetState(src.minIndex), count * sizeof(State));
460  for (SizeType j = 0; j < count; j++) {
461  if (s[j].out != kRegexInvalidState)
462  s[j].out += count;
463  if (s[j].out1 != kRegexInvalidState)
464  s[j].out1 += count;
465  }
466  *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
467  stateCount_ += count;
468  }
Stack< Allocator > states_
Definition: regex.h:593
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
State & GetState(SizeType index)
Definition: regex.h:173
SizeType stateCount_
Definition: regex.h:596

◆ Eval()

bool Eval ( Stack< Allocator > &  operandStack,
Operator  op 
)
inlineprivate

Definition at line 357 of file regex.h.

357  {
358  switch (op) {
359  case kConcatenation:
360  RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
361  {
362  Frag e2 = *operandStack.template Pop<Frag>(1);
363  Frag e1 = *operandStack.template Pop<Frag>(1);
364  Patch(e1.out, e2.start);
365  *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
366  }
367  return true;
368 
369  case kAlternation:
370  if (operandStack.GetSize() >= sizeof(Frag) * 2) {
371  Frag e2 = *operandStack.template Pop<Frag>(1);
372  Frag e1 = *operandStack.template Pop<Frag>(1);
373  SizeType s = NewState(e1.start, e2.start, 0);
374  *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
375  return true;
376  }
377  return false;
378 
379  case kZeroOrOne:
380  if (operandStack.GetSize() >= sizeof(Frag)) {
381  Frag e = *operandStack.template Pop<Frag>(1);
382  SizeType s = NewState(kRegexInvalidState, e.start, 0);
383  *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
384  return true;
385  }
386  return false;
387 
388  case kZeroOrMore:
389  if (operandStack.GetSize() >= sizeof(Frag)) {
390  Frag e = *operandStack.template Pop<Frag>(1);
391  SizeType s = NewState(kRegexInvalidState, e.start, 0);
392  Patch(e.out, s);
393  *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
394  return true;
395  }
396  return false;
397 
398  case kOneOrMore:
399  if (operandStack.GetSize() >= sizeof(Frag)) {
400  Frag e = *operandStack.template Pop<Frag>(1);
401  SizeType s = NewState(kRegexInvalidState, e.start, 0);
402  Patch(e.out, s);
403  *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
404  return true;
405  }
406  return false;
407 
408  default:
409  // syntax error (e.g. unclosed kLeftParenthesis)
410  return false;
411  }
412  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
static SizeType Min(SizeType a, SizeType b)
Definition: regex.h:453
SizeType Append(SizeType l1, SizeType l2)
Definition: regex.h:342
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:322
void Patch(SizeType l, SizeType s)
Definition: regex.h:350
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Here is the call graph for this function:

◆ EvalQuantifier()

bool EvalQuantifier ( Stack< Allocator > &  operandStack,
unsigned  n,
unsigned  m 
)
inlineprivate

Definition at line 414 of file regex.h.

414  {
415  RAPIDJSON_ASSERT(n <= m);
416  RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
417 
418  if (n == 0) {
419  if (m == 0) // a{0} not support
420  return false;
421  else if (m == kInfinityQuantifier)
422  Eval(operandStack, kZeroOrMore); // a{0,} -> a*
423  else {
424  Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
425  for (unsigned i = 0; i < m - 1; i++)
426  CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
427  for (unsigned i = 0; i < m - 1; i++)
428  Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
429  }
430  return true;
431  }
432 
433  for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
434  CloneTopOperand(operandStack);
435 
436  if (m == kInfinityQuantifier)
437  Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
438  else if (m > n) {
439  CloneTopOperand(operandStack); // a{3,5} -> a a a a
440  Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
441  for (unsigned i = n; i < m - 1; i++)
442  CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
443  for (unsigned i = n; i < m; i++)
444  Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
445  }
446 
447  for (unsigned i = 0; i < n - 1; i++)
448  Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
449 
450  return true;
451  }
static const unsigned kInfinityQuantifier
Definition: regex.h:599
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition: regex.h:357
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
void CloneTopOperand(Stack< Allocator > &operandStack)
Definition: regex.h:455
Here is the call graph for this function:

◆ GetRange() [1/2]

Range& GetRange ( SizeType  index)
inlineprivate

Definition at line 183 of file regex.h.

183  {
184  RAPIDJSON_ASSERT(index < rangeCount_);
185  return ranges_.template Bottom<Range>()[index];
186  }
SizeType rangeCount_
Definition: regex.h:597
Stack< Allocator > ranges_
Definition: regex.h:594
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ GetRange() [2/2]

const Range& GetRange ( SizeType  index) const
inlineprivate

Definition at line 188 of file regex.h.

188  {
189  RAPIDJSON_ASSERT(index < rangeCount_);
190  return ranges_.template Bottom<Range>()[index];
191  }
SizeType rangeCount_
Definition: regex.h:597
Stack< Allocator > ranges_
Definition: regex.h:594
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ GetState() [1/2]

State& GetState ( SizeType  index)
inlineprivate

Definition at line 173 of file regex.h.

173  {
174  RAPIDJSON_ASSERT(index < stateCount_);
175  return states_.template Bottom<State>()[index];
176  }
Stack< Allocator > states_
Definition: regex.h:593
SizeType stateCount_
Definition: regex.h:596
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ GetState() [2/2]

const State& GetState ( SizeType  index) const
inlineprivate

Definition at line 178 of file regex.h.

178  {
179  RAPIDJSON_ASSERT(index < stateCount_);
180  return states_.template Bottom<State>()[index];
181  }
Stack< Allocator > states_
Definition: regex.h:593
SizeType stateCount_
Definition: regex.h:596
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ ImplicitConcatenation()

void ImplicitConcatenation ( Stack< Allocator > &  atomCountStack,
Stack< Allocator > &  operatorStack 
)
inlineprivate

Definition at line 336 of file regex.h.

336  {
337  if (*atomCountStack.template Top<unsigned>())
338  *operatorStack.template Push<Operator>() = kConcatenation;
339  (*atomCountStack.template Top<unsigned>())++;
340  }

◆ IsValid()

bool IsValid ( ) const
inline

Definition at line 135 of file regex.h.

135  {
136  return root_ != kRegexInvalidState;
137  }
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
Here is the caller graph for this function:

◆ Min()

static SizeType Min ( SizeType  a,
SizeType  b 
)
inlinestaticprivate

Definition at line 453 of file regex.h.

453 { return a < b ? a : b; }
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1181

◆ NewRange()

SizeType NewRange ( unsigned  codepoint)
inlineprivate

Definition at line 555 of file regex.h.

555  {
556  Range* r = ranges_.template Push<Range>();
557  r->start = r->end = codepoint;
558  r->next = kRegexInvalidRange;
559  return rangeCount_++;
560  }
SizeType rangeCount_
Definition: regex.h:597
static const SizeType kRegexInvalidRange
Definition: regex.h:76
Stack< Allocator > ranges_
Definition: regex.h:594

◆ NewState()

SizeType NewState ( SizeType  out,
SizeType  out1,
unsigned  codepoint 
)
inlineprivate

Definition at line 322 of file regex.h.

322  {
323  State* s = states_.template Push<State>();
324  s->out = out;
325  s->out1 = out1;
326  s->codepoint = codepoint;
327  s->rangeStart = kRegexInvalidRange;
328  return stateCount_++;
329  }
Stack< Allocator > states_
Definition: regex.h:593
static const SizeType kRegexInvalidRange
Definition: regex.h:76
SizeType stateCount_
Definition: regex.h:596

◆ Parse()

void Parse ( DecodedStream< InputStream, Encoding > &  ds)
inlineprivate

Definition at line 194 of file regex.h.

194  {
195  Stack<Allocator> operandStack(allocator_, 256); // Frag
196  Stack<Allocator> operatorStack(allocator_, 256); // Operator
197  Stack<Allocator> atomCountStack(allocator_, 256); // unsigned (Atom per parenthesis)
198 
199  *atomCountStack.template Push<unsigned>() = 0;
200 
201  unsigned codepoint;
202  while (ds.Peek() != 0) {
203  switch (codepoint = ds.Take()) {
204  case '^':
205  anchorBegin_ = true;
206  break;
207 
208  case '$':
209  anchorEnd_ = true;
210  break;
211 
212  case '|':
213  while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
214  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
215  return;
216  *operatorStack.template Push<Operator>() = kAlternation;
217  *atomCountStack.template Top<unsigned>() = 0;
218  break;
219 
220  case '(':
221  *operatorStack.template Push<Operator>() = kLeftParenthesis;
222  *atomCountStack.template Push<unsigned>() = 0;
223  break;
224 
225  case ')':
226  while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
227  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
228  return;
229  if (operatorStack.Empty())
230  return;
231  operatorStack.template Pop<Operator>(1);
232  atomCountStack.template Pop<unsigned>(1);
233  ImplicitConcatenation(atomCountStack, operatorStack);
234  break;
235 
236  case '?':
237  if (!Eval(operandStack, kZeroOrOne))
238  return;
239  break;
240 
241  case '*':
242  if (!Eval(operandStack, kZeroOrMore))
243  return;
244  break;
245 
246  case '+':
247  if (!Eval(operandStack, kOneOrMore))
248  return;
249  break;
250 
251  case '{':
252  {
253  unsigned n, m;
254  if (!ParseUnsigned(ds, &n))
255  return;
256 
257  if (ds.Peek() == ',') {
258  ds.Take();
259  if (ds.Peek() == '}')
261  else if (!ParseUnsigned(ds, &m) || m < n)
262  return;
263  }
264  else
265  m = n;
266 
267  if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
268  return;
269  ds.Take();
270  }
271  break;
272 
273  case '.':
274  PushOperand(operandStack, kAnyCharacterClass);
275  ImplicitConcatenation(atomCountStack, operatorStack);
276  break;
277 
278  case '[':
279  {
280  SizeType range;
281  if (!ParseRange(ds, &range))
282  return;
284  GetState(s).rangeStart = range;
285  *operandStack.template Push<Frag>() = Frag(s, s, s);
286  }
287  ImplicitConcatenation(atomCountStack, operatorStack);
288  break;
289 
290  case '\\': // Escape character
291  if (!CharacterEscape(ds, &codepoint))
292  return; // Unsupported escape character
293  // fall through to default
294 
295  default: // Pattern character
296  PushOperand(operandStack, codepoint);
297  ImplicitConcatenation(atomCountStack, operatorStack);
298  }
299  }
300 
301  while (!operatorStack.Empty())
302  if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
303  return;
304 
305  // Link the operand to matching state.
306  if (operandStack.GetSize() == sizeof(Frag)) {
307  Frag* e = operandStack.template Pop<Frag>(1);
309  root_ = e->start;
310 
311 #if RAPIDJSON_REGEX_VERBOSE
312  printf("root: %d\n", root_);
313  for (SizeType i = 0; i < stateCount_ ; i++) {
314  State& s = GetState(i);
315  printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
316  }
317  printf("\n");
318 #endif
319  }
320  }
Allocator * allocator_
Definition: regex.h:592
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
bool ParseRange(DecodedStream< InputStream, Encoding > &ds, SizeType *range)
Definition: regex.h:485
static const unsigned kInfinityQuantifier
Definition: regex.h:599
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition: regex.h:563
bool EvalQuantifier(Stack< Allocator > &operandStack, unsigned n, unsigned m)
Definition: regex.h:414
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:322
void Patch(SizeType l, SizeType s)
Definition: regex.h:350
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition: regex.h:357
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
bool ParseUnsigned(DecodedStream< InputStream, Encoding > &ds, unsigned *u)
Definition: regex.h:471
static const unsigned kAnyCharacterClass
For &#39;.&#39;.
Definition: regex.h:149
State & GetState(SizeType index)
Definition: regex.h:173
SizeType stateCount_
Definition: regex.h:596
static const unsigned kRangeCharacterClass
Definition: regex.h:150
void ImplicitConcatenation(Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
Definition: regex.h:336
void PushOperand(Stack< Allocator > &operandStack, unsigned codepoint)
Definition: regex.h:331
Here is the call graph for this function:

◆ ParseRange()

bool ParseRange ( DecodedStream< InputStream, Encoding > &  ds,
SizeType range 
)
inlineprivate

Definition at line 485 of file regex.h.

485  {
486  bool isBegin = true;
487  bool negate = false;
488  int step = 0;
490  SizeType current = kRegexInvalidRange;
491  unsigned codepoint;
492  while ((codepoint = ds.Take()) != 0) {
493  if (isBegin) {
494  isBegin = false;
495  if (codepoint == '^') {
496  negate = true;
497  continue;
498  }
499  }
500 
501  switch (codepoint) {
502  case ']':
503  if (start == kRegexInvalidRange)
504  return false; // Error: nothing inside []
505  if (step == 2) { // Add trailing '-'
506  SizeType r = NewRange('-');
508  GetRange(current).next = r;
509  }
510  if (negate)
512  *range = start;
513  return true;
514 
515  case '\\':
516  if (ds.Peek() == 'b') {
517  ds.Take();
518  codepoint = 0x0008; // Escape backspace character
519  }
520  else if (!CharacterEscape(ds, &codepoint))
521  return false;
522  // fall through to default
523 
524  default:
525  switch (step) {
526  case 1:
527  if (codepoint == '-') {
528  step++;
529  break;
530  }
531  // fall through to step 0 for other characters
532 
533  case 0:
534  {
535  SizeType r = NewRange(codepoint);
536  if (current != kRegexInvalidRange)
537  GetRange(current).next = r;
538  if (start == kRegexInvalidRange)
539  start = r;
540  current = r;
541  }
542  step = 1;
543  break;
544 
545  default:
546  RAPIDJSON_ASSERT(step == 2);
547  GetRange(current).end = codepoint;
548  step = 0;
549  }
550  }
551  }
552  return false;
553  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
static const unsigned kRangeNegationFlag
Definition: regex.h:151
SizeType NewRange(unsigned codepoint)
Definition: regex.h:555
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition: regex.h:563
Range & GetRange(SizeType index)
Definition: regex.h:183
static const SizeType kRegexInvalidRange
Definition: regex.h:76
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Here is the call graph for this function:

◆ ParseUnsigned()

bool ParseUnsigned ( DecodedStream< InputStream, Encoding > &  ds,
unsigned *  u 
)
inlineprivate

Definition at line 471 of file regex.h.

471  {
472  unsigned r = 0;
473  if (ds.Peek() < '0' || ds.Peek() > '9')
474  return false;
475  while (ds.Peek() >= '0' && ds.Peek() <= '9') {
476  if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
477  return false; // overflow
478  r = r * 10 + (ds.Take() - '0');
479  }
480  *u = r;
481  return true;
482  }
Here is the call graph for this function:

◆ Patch()

void Patch ( SizeType  l,
SizeType  s 
)
inlineprivate

Definition at line 350 of file regex.h.

350  {
351  for (SizeType next; l != kRegexInvalidState; l = next) {
352  next = GetState(l).out;
353  GetState(l).out = s;
354  }
355  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
SizeType out
Equals to kInvalid for matching state.
Definition: regex.h:160
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
State & GetState(SizeType index)
Definition: regex.h:173

◆ PushOperand()

void PushOperand ( Stack< Allocator > &  operandStack,
unsigned  codepoint 
)
inlineprivate

Definition at line 331 of file regex.h.

331  {
333  *operandStack.template Push<Frag>() = Frag(s, s, s);
334  }
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:322
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75

Friends And Related Function Documentation

◆ GenericRegexSearch

friend class GenericRegexSearch
friend

Definition at line 118 of file regex.h.

Member Data Documentation

◆ allocator_

Allocator* allocator_
private

Definition at line 592 of file regex.h.

◆ anchorBegin_

bool anchorBegin_
private

Definition at line 602 of file regex.h.

◆ anchorEnd_

bool anchorEnd_
private

Definition at line 603 of file regex.h.

◆ kAnyCharacterClass

const unsigned kAnyCharacterClass = 0xFFFFFFFF
staticprivate

For '.'.

Definition at line 149 of file regex.h.

◆ kInfinityQuantifier

const unsigned kInfinityQuantifier = ~0u
staticprivate

Definition at line 599 of file regex.h.

◆ kRangeCharacterClass

const unsigned kRangeCharacterClass = 0xFFFFFFFE
staticprivate

Definition at line 150 of file regex.h.

◆ kRangeNegationFlag

const unsigned kRangeNegationFlag = 0x80000000
staticprivate

Definition at line 151 of file regex.h.

◆ ownAllocator_

Allocator* ownAllocator_
private

Definition at line 591 of file regex.h.

◆ rangeCount_

SizeType rangeCount_
private

Definition at line 597 of file regex.h.

◆ ranges_

Stack<Allocator> ranges_
private

Definition at line 594 of file regex.h.

◆ root_

SizeType root_
private

Definition at line 595 of file regex.h.

◆ stateCount_

SizeType stateCount_
private

Definition at line 596 of file regex.h.

◆ states_

Stack<Allocator> states_
private

Definition at line 593 of file regex.h.


The documentation for this class was generated from the following file: