Gnash  0.8.11dev
utf8.h
Go to the documentation of this file.
1 // utf8.h: utilities for converting to and from UTF-8
2 //
3 // Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 //
19 // Based on the public domain work of Thatcher Ulrich <tu@tulrich.com> 2004
20 
21 #ifndef UTF8_H
22 #define UTF8_H
23 
24 #include <string>
25 #include <cstdint> // for C99 int types
26 #include <vector>
27 
28 #include "dsodefs.h" // For DSOEXPORT
29 
30 // Android doesn't have any support for wide characters at all.
31 #ifdef __ANDROID__
32 namespace std {
33 typedef basic_string
34  <wchar_t
35  ,std::char_traits<wchar_t>
36  ,std::allocator<wchar_t> >
37 wstring;
38 }
39 #endif
40 
41 namespace gnash {
42 
44 //
66 //
70 namespace utf8 {
71 
73  //
77  //
80  DSOEXPORT std::wstring decodeCanonicalString(const std::string& str, int version);
81 
83  //
93  DSOEXPORT std::string encodeCanonicalString(const std::wstring& wstr, int version);
94 
96  //
101  DSOEXPORT std::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it,
102  const std::string::const_iterator& e);
103 
106  DSOEXPORT std::string encodeUnicodeCharacter(std::uint32_t ucs_character);
107 
109  //
112  DSOEXPORT std::string encodeLatin1Character(std::uint32_t ucsCharacter);
113 
124  encBOCU1
125  };
126 
128  //
151  DSOEXPORT const char* stripBOM(const char* in, size_t& size,
152  TextEncoding& encoding);
153 
155  DSOEXPORT const char* textEncodingName(TextEncoding enc);
156 
160  ENCGUESS_OTHER = 2
161  };
162 
164  // Shift-Jis, UTF8, and other. Puts the DisplayObject count in length,
165  // and the offsets to the DisplayObjects in offsets, if offsets is not NULL.
166  // If not NULL, offsets should be at least s.length().
167  // offsets are not accurate if the return value is GUESSENC_OTHER
168  //
171  DSOEXPORT EncodingGuess guessEncoding(const std::string& s, int& length,
172  std::vector<int>& offsets);
173 
174 
175 } // namespace utf8
176 } // namespace gnash
177 
178 #endif // UTF8_H
179 
180 
181 // Local Variables:
182 // mode: C++
183 // c-basic-offset: 8
184 // tab-width: 8
185 // indent-tabs-mode: t
186 // End:
gnash::utf8::ENCGUESS_UNICODE
@ ENCGUESS_UNICODE
Definition: utf8.h:158
gnash::utf8::textEncodingName
const char * textEncodingName(TextEncoding enc)
Return name of a text encoding.
Definition: utf8.cpp:262
gnash::utf8::guessEncoding
EncodingGuess guessEncoding(const std::string &str, int &length, std::vector< int > &offsets)
Common code for guessing at the encoding of random text, between.
Definition: utf8.cpp:281
dsodefs.h
gnash::utf8::ENCGUESS_JIS
@ ENCGUESS_JIS
Definition: utf8.h:159
gnash::utf8::encodeUnicodeCharacter
std::string encodeUnicodeCharacter(std::uint32_t ucs_character)
Encodes the given wide character into a canonical string, theoretically up to 6 chars in length.
Definition: utf8.cpp:165
gnash::utf8::decodeNextUnicodeCharacter
std::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator &it, const std::string::const_iterator &e)
Return the next Unicode character in the UTF-8 encoded string.
Definition: utf8.cpp:93
gnash
Anonymous namespace for callbacks, local functions, event handlers etc.
Definition: dbus_ext.cpp:41
gnash::utf8::encUNSPECIFIED
@ encUNSPECIFIED
Definition: utf8.h:115
gnash::key::s
@ s
Definition: GnashKey.h:165
gnash::utf8::encUTF8
@ encUTF8
Definition: utf8.h:116
gnash::utf8::encUTF32BE
@ encUTF32BE
Definition: utf8.h:119
gnash::utf8::EncodingGuess
EncodingGuess
Definition: utf8.h:157
gnash::utf8::encSCSU
@ encSCSU
Definition: utf8.h:121
gnash::key::code
code
Definition: GnashKey.h:44
length
@ length
Definition: klash_part.cpp:329
gnash::utf8::stripBOM
const char * stripBOM(const char *in, size_t &size, TextEncoding &encoding)
Interpret (and skip) Byte Order Mark in input stream.
Definition: utf8.cpp:208
NEXT_BYTE
#define NEXT_BYTE(shift)
gnash::utf8::encBOCU1
@ encBOCU1
Definition: utf8.h:124
gnash::utf8::encUTF16LE
@ encUTF16LE
Definition: utf8.h:118
gnash::utf8::encUTFEBCDIC
@ encUTFEBCDIC
Definition: utf8.h:123
utf8.h
gnash::utf8::encUTF16BE
@ encUTF16BE
Definition: utf8.h:117
gnash::utf8::TextEncoding
TextEncoding
Definition: utf8.h:114
gnash::utf8::encodeCanonicalString
std::string encodeCanonicalString(const std::wstring &wstr, int version)
Converts a std::wstring into canonical std::string.
Definition: utf8.cpp:67
gnash::key::c
@ c
Definition: GnashKey.h:149
gnash::utf8::decodeCanonicalString
std::wstring decodeCanonicalString(const std::string &str, int version)
Converts a std::string with multibyte characters into a std::wstring.
Definition: utf8.cpp:39
gnash::utf8::ENCGUESS_OTHER
@ ENCGUESS_OTHER
Definition: utf8.h:160
DSOEXPORT
#define DSOEXPORT
Definition: dsodefs.h:55
width
@ width
Definition: klash_part.cpp:329
gnash::utf8::encodeLatin1Character
std::string encodeLatin1Character(std::uint32_t ucsCharacter)
Encodes the given wide character into an at least 8-bit character.
Definition: utf8.cpp:84
gnash::key::e
@ e
Definition: GnashKey.h:151
FIRST_BYTE
#define FIRST_BYTE(mask, shift)
gnash::utf8::encUTF7
@ encUTF7
Definition: utf8.h:122
gnash::utf8::encUTF32LE
@ encUTF32LE
Definition: utf8.h:120