27bool G::Convert::m_utf16 =
sizeof(wchar_t) == 2 ;
32 std::swap( m_utf16 , b ) ;
40 return sv.empty() ? std::wstring() : widenImp( sv.data() , sv.size() ) ;
46 if( sv.empty() )
return true ;
48 widenImp( sv.data() , sv.size() ,
nullptr , &valid ) ;
55 return s.empty() ? std::string() : narrowImp( s.data() , s.size() ) ;
62 return p && *p ? narrowImp( p , std::wcslen(p) ) : std::string() ;
69 return p && n ? narrowImp( p , n ) : std::string() ;
76 return s.find( L
'\xFFFD' ) != std::string::npos ;
83 return s.find(
"\xEF\xBF\xBD" ) != std::string::npos ;
89std::wstring G::Convert::widenImp(
const char * p_in , std::size_t n_in )
91 std::size_t n_out = widenImp( p_in , n_in ,
nullptr ) ;
92 if( n_out == 0U )
return {} ;
95 widenImp( p_in , n_in , &*out.begin() ) ;
99std::size_t G::Convert::widenImp(
const char * p_in , std::size_t n_in ,
wchar_t * p_out ,
bool * valid_out )
noexcept
101 const unsigned char * p =
reinterpret_cast<const unsigned char*
>( p_in ) ;
102 std::size_t n_out = 0U ;
104 for( std::size_t i = 0U ; i < n_in ; i += d , p += d )
107 auto pair = u8in( p , n_in-i ) ;
108 unicode_type u = pair.first ;
110 if( u == unicode_error )
119 if( u <= 0xD7FF || ( u >= 0xE000 && u <= 0xFFFF ) )
122 *p_out++ =
static_cast<wchar_t>(u) ;
125 else if( u >= 0x10000 && u <= 0x10FFFF )
127 static_assert( (0x10FFFF - 0x10000) == 0xFFFFF ,
"" ) ;
131 *p_out++ =
static_cast<wchar_t>( 0xD800 | (u >> 10) ) ;
132 *p_out++ =
static_cast<wchar_t>( 0xDC00 | (u & 0x3FF) ) ;
139 *p_out++ = L
'\xFFFD' ;
146 *p_out++ =
static_cast<wchar_t>(u) ;
153std::pair<G::Convert::unicode_type,std::size_t>
G::Convert::u8in(
const unsigned char * p , std::size_t n )
noexcept
155 unicode_type u = unicode_error ;
160 else if( ( p[0] & 0x80U ) == 0U )
164 else if( ( p[0] & 0xC0 ) == 0x80 )
167 else if( ( p[0] & 0xE0U ) == 0xC0U && n > 1U &&
168 !( ( p[0] & 0x1EU ) == 0U ) &&
169 ( p[1] & 0xC0 ) == 0x80U )
172 u = (
static_cast<unicode_type
>(p[0]) & 0x1FU ) << 6 ;
173 u |= (
static_cast<unicode_type
>(p[1]) & 0x3FU ) << 0 ;
175 else if( ( p[0] & 0xE0U ) == 0xC0U )
179 else if( ( p[0] & 0xF0U ) == 0xE0U && n > 2U &&
180 !( ( p[0] & 0x0FU ) == 0U && ( p[1] & 0x20U ) == 0U ) &&
181 !( ( p[0] & 0x0FU ) == 0x0DU && ( p[1] & 0x20U ) == 0x20U ) &&
182 ( p[1] & 0xC0 ) == 0x80U &&
183 ( p[2] & 0xC0 ) == 0x80U )
186 u = (
static_cast<unicode_type
>(p[0]) & 0x0FU ) << 12 ;
187 u |= (
static_cast<unicode_type
>(p[1]) & 0x3FU ) << 6 ;
188 u |= (
static_cast<unicode_type
>(p[2]) & 0x3FU ) << 0 ;
190 else if( ( p[0] & 0xF0U ) == 0xE0U )
194 else if( ( p[0] & 0xF8U ) == 0xF0U && n > 3U &&
195 !( ( p[0] & 0x07U ) == 0U && ( p[1] & 0x30U ) == 0U ) &&
196 ( p[1] & 0xC0 ) == 0x80U &&
197 ( p[2] & 0xC0 ) == 0x80U &&
198 ( p[3] & 0xC0 ) == 0x80U )
201 u = (
static_cast<unicode_type
>(p[0]) & 0x07U ) << 18 ;
202 u |= (
static_cast<unicode_type
>(p[1]) & 0x3FU ) << 12 ;
203 u |= (
static_cast<unicode_type
>(p[2]) & 0x3FU ) << 6 ;
204 u |= (
static_cast<unicode_type
>(p[3]) & 0x3FU ) << 0 ;
206 else if( ( p[0] & 0xF8U ) == 0xF0U )
213 return std::make_pair( u , d ) ;
218 const unsigned char * p =
reinterpret_cast<const unsigned char*
>( s.data() ) ;
219 std::size_t n = s.size() ;
221 for( std::size_t i = 0U ; i < n ; i += d , p += d )
223 auto pair = u8in( p , n-i ) ;
224 if( pair.first == unicode_error ) pair.first = L
'\xFFFD' ;
225 if( !fn( pair.first , pair.second , i ) ) break ;
226 d = std::max( pair.second , std::size_t(1U) ) ;
230std::string G::Convert::narrowImp(
const wchar_t * p_in , std::size_t n_in )
232 std::size_t n_out = narrowImp( p_in , n_in ,
nullptr ) ;
235 G::Convert::narrowImp( p_in , n_in , &*s.begin() ) ;
239std::size_t G::Convert::narrowImp(
const wchar_t * p_in , std::size_t n_in ,
char * p_out )
noexcept
241 std::size_t n_out = 0U ;
243 for( std::size_t i = 0U ; i < n_in ; i += d , p_in += d )
245 unicode_type u = unicode_cast( p_in[0] ) ;
250 auto u1 = unicode_cast( m_utf16 && (i+1U) < n_in ? p_in[1] : L
'\0' ) ;
251 const bool u0_high = u >= 0xD800U && u <= 0xDBFFU ;
252 const bool u0_low = u >= 0xDC00U && u <= 0xDFFFU ;
253 const bool u1_low = u1 >= 0xDC00U && u1 <= 0xDFFFU ;
254 if( u0_high && u1_low )
257 u = 0x10000U | ((u & 0x3FFU) << 10) | (u1 & 0x3FFU) ;
259 else if( u0_high || u0_low )
266 if( error || u > 0x10FFFFU )
278 n_out += u8out( u , p_out ) ;
290 else if( u <= 0x7FU )
293 *p_out++ =
static_cast<unsigned char>(u) ;
296 else if( u >= 0x80U && u <= 0x7FFU )
300 *p_out++ = char_cast( 0xC0U | ((u >> 6) & 0x1FU) ) ;
301 *p_out++ = char_cast( 0x80U | ((u >> 0) & 0x3FU) ) ;
305 else if( u >= 0x800U && u <= 0xFFFFU )
309 *p_out++ = char_cast( 0xE0U | ((u >> 12) & 0x0FU) ) ;
310 *p_out++ = char_cast( 0x80U | ((u >> 6) & 0x3FU) ) ;
311 *p_out++ = char_cast( 0x80U | ((u >> 0) & 0x3FU) ) ;
319 *p_out++ = char_cast( 0xF0U | ((u >> 18) & 0x07U) ) ;
320 *p_out++ = char_cast( 0x80U | ((u >> 12) & 0x3FU) ) ;
321 *p_out++ = char_cast( 0x80U | ((u >> 6) & 0x3FU) ) ;
322 *p_out++ = char_cast( 0x80U | ((u >> 0) & 0x3FU) ) ;
328G::Convert::unicode_type G::Convert::unicode_cast(
wchar_t c )
noexcept
330 return static_cast<unicode_type
>(
static_cast<std::make_unsigned<wchar_t>::type
>(c) ) ;
333char G::Convert::char_cast(
unsigned int i )
noexcept
335 return static_cast<char>(
static_cast<unsigned char>( i ) ) ;
static std::wstring widen(std::string_view)
Widens from UTF-8 to UTF-16/UCS-4 wstring.
static std::pair< unicode_type, std::size_t > u8in(const unsigned char *, std::size_t n) noexcept
Reads a Unicode character from a UTF-8 buffer together with the number of bytes consumed.
static bool invalid(const std::wstring &)
Returns true if the string contains L'\xFFFD'.
static void u8parse(std::string_view, ParseFn)
Calls a function for each Unicode value in the given UTF-8 string.
static std::string narrow(const std::wstring &)
Narrows from UTF-16/UCS-4 wstring to UTF-8.
static std::size_t u8out(unicode_type, char *&) noexcept
Puts a Unicode character value into a character buffer with UTF-8 encoding.
static bool valid(std::string_view) noexcept
Returns true if the string is valid UTF-8.
static bool utf16(bool)
Forces UTF-16 even if wchar_t is 4 bytes. Used in testing.