27#include <wx/strconv.h>
41 m_s( (const char*) o.utf8_str() )
48 return wxString(
c_str(), wxConvUTF8 );
52UTF8::operator wxString ()
const
54 return wxString( c_str(), wxConvUTF8 );
60 m_s = (
const char*) o.utf8_str();
70 unsigned ch = *aSequence;
79 const unsigned char* s = aSequence;
81 static const unsigned char utf8_len[] = {
94 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
104 int len = utf8_len[ *s - 0x80 ];
111 wxFAIL_MSG( wxS(
"uni_forward: invalid start byte" ) );
117 if( ( s[1] & 0xc0 ) != 0x80 )
120 wxFAIL_MSG( wxS(
"uni_forward: invalid continuation byte" ) );
125 ch = ((s[0] & 0x1f) << 6) +
126 ((s[1] & 0x3f) << 0);
132 if( (s[1] & 0xc0) != 0x80 ||
133 (s[2] & 0xc0) != 0x80 ||
134 (s[0] == 0xE0 && s[1] < 0xA0)
139 wxFAIL_MSG( wxS(
"uni_forward: invalid continuation byte" ) );
144 ch = ((s[0] & 0x0f) << 12) +
145 ((s[1] & 0x3f) << 6 ) +
146 ((s[2] & 0x3f) << 0 );
152 if( (s[1] & 0xc0) != 0x80 ||
153 (s[2] & 0xc0) != 0x80 ||
154 (s[3] & 0xc0) != 0x80 ||
155 (s[0] == 0xF0 && s[1] < 0x90) ||
156 (s[0] == 0xF4 && s[1] > 0x8F) )
159 wxFAIL_MSG( wxS(
"uni_forward: invalid continuation byte" ) );
164 ch = ((s[0] & 0x7) << 18) +
165 ((s[1] & 0x3f) << 12) +
166 ((s[2] & 0x3f) << 6 ) +
167 ((s[3] & 0x3f) << 0 );
182 int len = strlen( aString );
186 const unsigned char*
next = (
unsigned char*) aString;
187 const unsigned char* end =
next + len;
212 std::vector< char > temp( wcslen( txt ) * 4 + 1 );
213 wxConvUTF8.WC2MB( temp.data(), txt, temp.size() );
214 m_s.assign( temp.data() );
218 auto string = wxSafeConvertWX2MB( txt );
219 m_s.assign(
string );
230 m_s.operator+=( char( w_ch ) );
An 8 bit string that is assuredly encoded in UTF8, and supplies special conversion support to and fro...
UTF8 & operator+=(const UTF8 &str)
std::string substr(size_t pos=0, size_t len=npos) const
static int uni_forward(const unsigned char *aSequence, unsigned *aResult=nullptr)
Advance over a single UTF8 encoded multibyte character, capturing the Unicode character as it goes,...
const char * c_str() const
UTF8 & operator=(const wxString &o)
bool IsUTF8(const char *aString)
Test a C string to see if it is UTF8 encoded.