IN2OSM  1.0.1
AutoUTFInputStream< CharType, InputByteStream >

Input stream wrapper with dynamically bound encoding and automatic encoding detection. More...

#include <encodedstream.h>

Collaboration diagram for AutoUTFInputStream< CharType, InputByteStream >:
Collaboration graph

Public Types

typedef CharType Ch
 

Public Member Functions

 AutoUTFInputStream (InputByteStream &is, UTFType type=kUTF8)
 Constructor. More...
 
UTFType GetType () const
 
bool HasBOM () const
 
Ch Peek () const
 
Ch Take ()
 
size_t Tell () const
 
void Put (Ch)
 
void Flush ()
 
ChPutBegin ()
 
size_t PutEnd (Ch *)
 

Private Types

typedef Ch(* TakeFunc) (InputByteStream &is)
 

Private Member Functions

 RAPIDJSON_STATIC_ASSERT (sizeof(typename InputByteStream::Ch)==1)
 
 AutoUTFInputStream (const AutoUTFInputStream &)
 
AutoUTFInputStreamoperator= (const AutoUTFInputStream &)
 
void DetectType ()
 

Private Attributes

InputByteStream * is_
 
UTFType type_
 
Ch current_
 
TakeFunc takeFunc_
 
bool hasBOM_
 

Detailed Description

template<typename CharType, typename InputByteStream>
class AutoUTFInputStream< CharType, InputByteStream >

Input stream wrapper with dynamically bound encoding and automatic encoding detection.

Template Parameters
CharTypeType of character for reading.
InputByteStreamtype of input byte stream to be wrapped.

Definition at line 135 of file encodedstream.h.

Member Typedef Documentation

◆ Ch

typedef CharType Ch

Definition at line 138 of file encodedstream.h.

◆ TakeFunc

typedef Ch(* TakeFunc) (InputByteStream &is)
private

Definition at line 219 of file encodedstream.h.

Constructor & Destructor Documentation

◆ AutoUTFInputStream() [1/2]

AutoUTFInputStream ( InputByteStream &  is,
UTFType  type = kUTF8 
)
inline

Constructor.

Parameters
isinput stream to be wrapped.
typeUTF encoding type if it is not detected from the stream.

Definition at line 145 of file encodedstream.h.

145  : is_(&is), type_(type), hasBOM_(false) {
146  RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
147  DetectType();
148  static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
149  takeFunc_ = f[type_];
150  current_ = takeFunc_(*is_);
151  }
#define RAPIDJSON_ENCODINGS_FUNC(x)
InputByteStream * is_
UTF-32 big endian.
Definition: encodings.h:608
UTF-8.
Definition: encodings.h:604
Ch(* TakeFunc)(InputByteStream &is)
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Here is the call graph for this function:

◆ AutoUTFInputStream() [2/2]

AutoUTFInputStream ( const AutoUTFInputStream< CharType, InputByteStream > &  )
private

Member Function Documentation

◆ DetectType()

void DetectType ( )
inlineprivate

Definition at line 171 of file encodedstream.h.

171  {
172  // BOM (Byte Order Mark):
173  // 00 00 FE FF UTF-32BE
174  // FF FE 00 00 UTF-32LE
175  // FE FF UTF-16BE
176  // FF FE UTF-16LE
177  // EF BB BF UTF-8
178 
179  const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
180  if (!c)
181  return;
182 
183  unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
184  hasBOM_ = false;
185  if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
186  else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
187  else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
188  else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
189  else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
190 
191  // RFC 4627: Section 3
192  // "Since the first two characters of a JSON text will always be ASCII
193  // characters [RFC0020], it is possible to determine whether an octet
194  // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
195  // at the pattern of nulls in the first four octets."
196  // 00 00 00 xx UTF-32BE
197  // 00 xx 00 xx UTF-16BE
198  // xx 00 00 00 UTF-32LE
199  // xx 00 xx 00 UTF-16LE
200  // xx xx xx xx UTF-8
201 
202  if (!hasBOM_) {
203  int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
204  switch (pattern) {
205  case 0x08: type_ = kUTF32BE; break;
206  case 0x0A: type_ = kUTF16BE; break;
207  case 0x01: type_ = kUTF32LE; break;
208  case 0x05: type_ = kUTF16LE; break;
209  case 0x0F: type_ = kUTF8; break;
210  default: break; // Use type defined by user.
211  }
212  }
213 
214  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
215  if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
216  if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
217  }
InputByteStream * is_
UTF-32 big endian.
Definition: encodings.h:608
UTF-16 little endian.
Definition: encodings.h:605
UTF-8.
Definition: encodings.h:604
UTF-16 big endian.
Definition: encodings.h:606
UTF-32 little endian.
Definition: encodings.h:607
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ Flush()

void Flush ( )
inline

Definition at line 162 of file encodedstream.h.

162 { RAPIDJSON_ASSERT(false); }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ GetType()

UTFType GetType ( ) const
inline

Definition at line 153 of file encodedstream.h.

153 { return type_; }

◆ HasBOM()

bool HasBOM ( ) const
inline

Definition at line 154 of file encodedstream.h.

154 { return hasBOM_; }

◆ operator=()

AutoUTFInputStream& operator= ( const AutoUTFInputStream< CharType, InputByteStream > &  )
private

◆ Peek()

Ch Peek ( ) const
inline

Definition at line 156 of file encodedstream.h.

156 { return current_; }

◆ Put()

void Put ( Ch  )
inline

Definition at line 161 of file encodedstream.h.

161 { RAPIDJSON_ASSERT(false); }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ PutBegin()

Ch* PutBegin ( )
inline

Definition at line 163 of file encodedstream.h.

163 { RAPIDJSON_ASSERT(false); return 0; }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406

◆ PutEnd()

size_t PutEnd ( Ch )
inline

Definition at line 164 of file encodedstream.h.

164 { RAPIDJSON_ASSERT(false); return 0; }
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:406
Here is the call graph for this function:

◆ RAPIDJSON_STATIC_ASSERT()

RAPIDJSON_STATIC_ASSERT ( sizeof(typename InputByteStream::Ch)  = =1)
private

◆ Take()

Ch Take ( )
inline

Definition at line 157 of file encodedstream.h.

157 { Ch c = current_; current_ = takeFunc_(*is_); return c; }
InputByteStream * is_

◆ Tell()

size_t Tell ( ) const
inline

Definition at line 158 of file encodedstream.h.

158 { return is_->Tell(); }
InputByteStream * is_

Member Data Documentation

◆ current_

Ch current_
private

Definition at line 222 of file encodedstream.h.

◆ hasBOM_

bool hasBOM_
private

Definition at line 224 of file encodedstream.h.

◆ is_

InputByteStream* is_
private

Definition at line 220 of file encodedstream.h.

◆ takeFunc_

TakeFunc takeFunc_
private

Definition at line 223 of file encodedstream.h.

◆ type_

UTFType type_
private

Definition at line 221 of file encodedstream.h.


The documentation for this class was generated from the following file: