21 #ifndef RAPIDJSON_ENCODINGS_H_
22 #define RAPIDJSON_ENCODINGS_H_
28 RAPIDJSON_DIAG_OFF(4244)
29 RAPIDJSON_DIAG_OFF(4702)
30 #elif defined(__GNUC__)
32 RAPIDJSON_DIAG_OFF(effc++)
52 template<typename OutputStream>
53 static void Encode(OutputStream& os, unsigned codepoint);
59 template <typename InputStream>
60 static bool Decode(InputStream& is, unsigned* codepoint);
67 template <typename InputStream, typename OutputStream>
68 static bool Validate(InputStream& is, OutputStream& os);
70 // The following functions are deal with byte streams.
73 template <typename InputByteStream>
74 static CharType TakeBOM(InputByteStream& is);
77 template <typename InputByteStream>
78 static Ch Take(InputByteStream& is);
81 template <typename OutputByteStream>
82 static void PutBOM(OutputByteStream& os);
85 template <typename OutputByteStream>
86 static void Put(OutputByteStream& os, Ch c);
100 template<
typename CharType =
char>
104 enum { supportUnicode = 1 };
106 template<
typename OutputStream>
107 static void Encode(OutputStream& os,
unsigned codepoint) {
108 if (codepoint <= 0x7F)
109 os.Put(static_cast<Ch>(codepoint & 0xFF));
110 else if (codepoint <= 0x7FF) {
111 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
112 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
114 else if (codepoint <= 0xFFFF) {
115 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
116 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
117 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
121 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
122 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
123 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
124 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
128 template <
typename InputStream>
129 static bool Decode(InputStream& is,
unsigned* codepoint) {
130 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
131 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
132 #define TAIL() COPY(); TRANS(0x70)
135 *codepoint = (
unsigned char)c;
139 unsigned char type = GetRange((
unsigned char)c);
140 *codepoint = (0xFF >> type) & (
unsigned char)c;
143 case 2: TAIL();
return result;
144 case 3: TAIL(); TAIL();
return result;
145 case 4: COPY(); TRANS(0x50); TAIL();
return result;
146 case 5: COPY(); TRANS(0x10); TAIL(); TAIL();
return result;
147 case 6: TAIL(); TAIL(); TAIL();
return result;
148 case 10: COPY(); TRANS(0x20); TAIL();
return result;
149 case 11: COPY(); TRANS(0x60); TAIL(); TAIL();
return result;
150 default:
return false;
157 template <
typename InputStream,
typename OutputStream>
158 static bool Validate(InputStream& is, OutputStream& os) {
159 #define COPY() os.Put(c = is.Take())
160 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
161 #define TAIL() COPY(); TRANS(0x70)
168 switch (GetRange((
unsigned char)c)) {
169 case 2: TAIL();
return result;
170 case 3: TAIL(); TAIL();
return result;
171 case 4: COPY(); TRANS(0x50); TAIL();
return result;
172 case 5: COPY(); TRANS(0x10); TAIL(); TAIL();
return result;
173 case 6: TAIL(); TAIL(); TAIL();
return result;
174 case 10: COPY(); TRANS(0x20); TAIL();
return result;
175 case 11: COPY(); TRANS(0x60); TAIL(); TAIL();
return result;
176 default:
return false;
183 static unsigned char GetRange(
unsigned char c) {
186 static const unsigned char type[] = {
187 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
189 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
192 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
193 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
194 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
195 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
196 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
201 template <
typename InputByteStream>
202 static CharType TakeBOM(InputByteStream& is) {
205 if ((
unsigned char)c != 0xEFu)
return c;
207 if ((
unsigned char)c != 0xBBu)
return c;
209 if ((
unsigned char)c != 0xBFu)
return c;
214 template <
typename InputByteStream>
215 static Ch Take(InputByteStream& is) {
220 template <
typename OutputByteStream>
221 static void PutBOM(OutputByteStream& os) {
223 os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
226 template <
typename OutputByteStream>
227 static void Put(OutputByteStream& os, Ch c) {
229 os.Put(static_cast<typename OutputByteStream::Ch>(c));
245 template<
typename CharType =
wchar_t>
248 RAPIDJSON_STATIC_ASSERT(
sizeof(Ch) >= 2);
250 enum { supportUnicode = 1 };
252 template<
typename OutputStream>
253 static void Encode(OutputStream& os,
unsigned codepoint) {
254 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputStream::Ch) >= 2);
255 if (codepoint <= 0xFFFF) {
257 os.Put(static_cast<typename OutputStream::Ch>(codepoint));
261 unsigned v = codepoint - 0x10000;
262 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
263 os.Put((v & 0x3FF) | 0xDC00);
267 template <
typename InputStream>
268 static bool Decode(InputStream& is,
unsigned* codepoint) {
269 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 2);
271 if (c < 0xD800 || c > 0xDFFF) {
275 else if (c <= 0xDBFF) {
276 *codepoint = (c & 0x3FF) << 10;
278 *codepoint |= (c & 0x3FF);
279 *codepoint += 0x10000;
280 return c >= 0xDC00 && c <= 0xDFFF;
285 template <
typename InputStream,
typename OutputStream>
286 static bool Validate(InputStream& is, OutputStream& os) {
287 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 2);
288 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputStream::Ch) >= 2);
290 os.Put(c = is.Take());
291 if (c < 0xD800 || c > 0xDFFF)
293 else if (c <= 0xDBFF) {
294 os.Put(c = is.Take());
295 return c >= 0xDC00 && c <= 0xDFFF;
302 template<
typename CharType =
wchar_t>
304 template <
typename InputByteStream>
305 static CharType TakeBOM(InputByteStream& is) {
306 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
307 CharType c = Take(is);
308 return (
unsigned short)c == 0xFEFFu ? Take(is) : c;
311 template <
typename InputByteStream>
312 static CharType Take(InputByteStream& is) {
313 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
314 CharType c = (
unsigned char)is.Take();
315 c |= (
unsigned char)is.Take() << 8;
319 template <
typename OutputByteStream>
320 static void PutBOM(OutputByteStream& os) {
321 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
322 os.Put(0xFFu); os.Put(0xFEu);
325 template <
typename OutputByteStream>
326 static void Put(OutputByteStream& os, CharType c) {
327 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
329 os.Put((c >> 8) & 0xFFu);
334 template<
typename CharType =
wchar_t>
336 template <
typename InputByteStream>
337 static CharType TakeBOM(InputByteStream& is) {
338 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
339 CharType c = Take(is);
340 return (
unsigned short)c == 0xFEFFu ? Take(is) : c;
343 template <
typename InputByteStream>
344 static CharType Take(InputByteStream& is) {
345 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
346 CharType c = (
unsigned char)is.Take() << 8;
347 c |= (
unsigned char)is.Take();
351 template <
typename OutputByteStream>
352 static void PutBOM(OutputByteStream& os) {
353 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
354 os.Put(0xFEu); os.Put(0xFFu);
357 template <
typename OutputByteStream>
358 static void Put(OutputByteStream& os, CharType c) {
359 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
360 os.Put((c >> 8) & 0xFFu);
376 template<
typename CharType =
unsigned>
379 RAPIDJSON_STATIC_ASSERT(
sizeof(Ch) >= 4);
381 enum { supportUnicode = 1 };
383 template<
typename OutputStream>
384 static void Encode(OutputStream& os,
unsigned codepoint) {
385 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputStream::Ch) >= 4);
390 template <
typename InputStream>
391 static bool Decode(InputStream& is,
unsigned* codepoint) {
392 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 4);
395 return c <= 0x10FFFF;
398 template <
typename InputStream,
typename OutputStream>
399 static bool Validate(InputStream& is, OutputStream& os) {
400 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 4);
402 os.Put(c = is.Take());
403 return c <= 0x10FFFF;
408 template<
typename CharType =
unsigned>
410 template <
typename InputByteStream>
411 static CharType TakeBOM(InputByteStream& is) {
412 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
413 CharType c = Take(is);
414 return (
unsigned)c == 0x0000FEFFu ? Take(is) : c;
417 template <
typename InputByteStream>
418 static CharType Take(InputByteStream& is) {
419 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
420 CharType c = (
unsigned char)is.Take();
421 c |= (
unsigned char)is.Take() << 8;
422 c |= (
unsigned char)is.Take() << 16;
423 c |= (
unsigned char)is.Take() << 24;
427 template <
typename OutputByteStream>
428 static void PutBOM(OutputByteStream& os) {
429 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
430 os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
433 template <
typename OutputByteStream>
434 static void Put(OutputByteStream& os, CharType c) {
435 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
437 os.Put((c >> 8) & 0xFFu);
438 os.Put((c >> 16) & 0xFFu);
439 os.Put((c >> 24) & 0xFFu);
444 template<
typename CharType =
unsigned>
446 template <
typename InputByteStream>
447 static CharType TakeBOM(InputByteStream& is) {
448 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
449 CharType c = Take(is);
450 return (
unsigned)c == 0x0000FEFFu ? Take(is) : c;
453 template <
typename InputByteStream>
454 static CharType Take(InputByteStream& is) {
455 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
456 CharType c = (
unsigned char)is.Take() << 24;
457 c |= (
unsigned char)is.Take() << 16;
458 c |= (
unsigned char)is.Take() << 8;
459 c |= (
unsigned char)is.Take();
463 template <
typename OutputByteStream>
464 static void PutBOM(OutputByteStream& os) {
465 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
466 os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
469 template <
typename OutputByteStream>
470 static void Put(OutputByteStream& os, CharType c) {
471 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
472 os.Put((c >> 24) & 0xFFu);
473 os.Put((c >> 16) & 0xFFu);
474 os.Put((c >> 8) & 0xFFu);
487 template<
typename CharType =
char>
491 enum { supportUnicode = 0 };
493 template<
typename OutputStream>
494 static void Encode(OutputStream& os,
unsigned codepoint) {
496 os.Put(static_cast<Ch>(codepoint & 0xFF));
499 template <
typename InputStream>
500 static bool Decode(InputStream& is,
unsigned* codepoint) {
501 unsigned char c =
static_cast<unsigned char>(is.Take());
506 template <
typename InputStream,
typename OutputStream>
507 static bool Validate(InputStream& is, OutputStream& os) {
508 unsigned char c = is.Take();
513 template <
typename InputByteStream>
514 static CharType TakeBOM(InputByteStream& is) {
520 template <
typename InputByteStream>
521 static Ch Take(InputByteStream& is) {
526 template <
typename OutputByteStream>
527 static void PutBOM(OutputByteStream& os) {
532 template <
typename OutputByteStream>
533 static void Put(OutputByteStream& os, Ch c) {
535 os.Put(static_cast<typename OutputByteStream::Ch>(c));
554 template<
typename CharType>
558 enum { supportUnicode = 1 };
560 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
562 template<
typename OutputStream>
563 RAPIDJSON_FORCEINLINE
static void Encode(OutputStream& os,
unsigned codepoint) {
564 typedef void (*EncodeFunc)(OutputStream&, unsigned);
565 static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
566 (*f[os.GetType()])(os, codepoint);
569 template <
typename InputStream>
570 RAPIDJSON_FORCEINLINE
static bool Decode(InputStream& is,
unsigned* codepoint) {
571 typedef bool (*DecodeFunc)(InputStream&,
unsigned*);
572 static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
573 return (*f[is.GetType()])(is, codepoint);
576 template <
typename InputStream,
typename OutputStream>
577 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
578 typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
579 static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
580 return (*f[is.GetType()])(is, os);
583 #undef RAPIDJSON_ENCODINGS_FUNC
590 template<
typename SourceEncoding,
typename TargetEncoding>
593 template<
typename InputStream,
typename OutputStream>
594 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream& is, OutputStream& os) {
596 if (!SourceEncoding::Decode(is, &codepoint))
598 TargetEncoding::Encode(os, codepoint);
603 template<
typename InputStream,
typename OutputStream>
604 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
610 template<
typename Encoding>
612 template<
typename InputStream,
typename OutputStream>
613 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream& is, OutputStream& os) {
618 template<
typename InputStream,
typename OutputStream>
619 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
620 return Encoding::Validate(is, os);
626 #if defined(__GNUC__) || defined(_MSV_VER)
630 #endif // RAPIDJSON_ENCODINGS_H_