42 IdnImp & encode( std::string_view domain ) ;
43 std::string result()
const {
return m_output ; }
44 static bool is7Bit( std::string_view s ) noexcept ;
47 using unicode_type = G::Convert::unicode_type ;
48 using value_type = g_uint32_t ;
49 using List = std::vector<unicode_type> ;
52 void outputPunycode( std::string_view ) ;
53 static bool parse( List& , unicode_type , std::size_t , std::size_t ) ;
54 static value_type adapt( value_type d , value_type n ,
bool first ) noexcept ;
55 struct div_t { value_type quot ; value_type rem ; } ;
56 static div_t div( value_type numerator , value_type demoninator ) noexcept ;
57 static value_type clamp( value_type v , value_type lo , value_type hi ) noexcept ;
58 static void check(
bool ) ;
59 static void check( value_type , value_type , value_type ) ;
60 static bool is7Bit_(
char ) noexcept ;
63 static constexpr value_type c_skew = 38U ;
64 static constexpr value_type c_damp = 700U ;
65 static constexpr value_type c_base = 36U ;
66 static constexpr value_type c_tmin = 1U ;
67 static constexpr value_type c_tmax = 26U ;
68 static constexpr value_type c_initial_bias = 72U ;
69 static constexpr value_type c_initial_n = 128U ;
70 std::string m_output ;
91 if( domain.empty() || IdnImp::is7Bit(domain) )
92 return G::sv_to_string( domain ) ;
94 return IdnImp().encode(domain).result() ;
102G::IdnImp & G::IdnImp::encode( std::string_view domain )
104 m_output.reserve( domain.size() * 2U ) ;
105 m_ulist.reserve( domain.size() ) ;
109 m_output.append( first?0U:1U ,
'.' ) ;
112 m_output.append( t.data() , t.size() ) ;
116 m_output.append(
"xn--" , 4U ) ;
117 outputPunycode( t() ) ;
123void G::IdnImp::outputPunycode( std::string_view label )
127 std::size_t b0 = m_output.size() ;
128 std::copy_if( label.begin() , label.end() , std::back_inserter(m_output) , &IdnImp::is7Bit_ ) ;
129 value_type b =
static_cast<value_type
>( m_output.size() - b0 ) ;
130 if( b ) m_output.append( 1U ,
'-' ) ;
132 using namespace std::placeholders ;
135 static constexpr std::string_view c_map {
"abcdefghijklmnopqrstuvwxyz0123456789" , 36U } ;
136 value_type n = c_initial_n ;
137 value_type delta = 0 ;
138 value_type bias = c_initial_bias ;
139 for( value_type h = b ; h < static_cast<value_type>(m_ulist.size()) ; delta++ , n++ )
141 auto m_p = std::min_element( m_ulist.begin() , m_ulist.end() ,
142 [n](unicode_type a_,unicode_type b_){return (a_<n?0x110000U:a_) < (b_<n?0x110000U:b_) ;} ) ;
144 G_ASSERT( m_p != m_ulist.end() && *m_p >= n ) ;
145 check( m_p != m_ulist.end() && *m_p >= n ) ;
146 G_DEBUG(
"idn: next code point is " << std::hex << std::setfill(
'0') << std::setw(4U) << *m_p ) ;
147 check( delta , *m_p-n , h+1U ) ;
149 delta += (*m_p-n) * (h+1U) ;
151 for( std::size_t i = 0U ; i < m_ulist.size() ; i++ )
153 if( m_ulist[i] < n ) { delta++ ; check( delta != 0U ) ; }
154 if( m_ulist[i] == n )
157 const auto output_size = m_output.size() ; GDEF_IGNORE_VARIABLE(output_size) ;
158 for( value_type k = c_base ;; k += c_base )
160 value_type t = clamp( k-std::min(k,bias) , c_tmin , c_tmax ) ;
162 auto x = div( q-t , c_base-t ) ;
static_assert(c_base>c_tmax,
"") ;
164 m_output.push_back( c_map.at(std::size_t(t)+x.rem) ) ;
166 m_output.push_back( c_map.at(q) ) ;
167 G_DEBUG(
"idn: delta " << delta <<
", encodes as \"" << m_output.substr(output_size) <<
"\"" ) ;
168 bias = adapt( delta , h+1U , h == b ) ;
169 G_DEBUG(
"idn: bias becomes " << bias ) ;
177G::IdnImp::value_type G::IdnImp::adapt( value_type d , value_type n ,
bool first )
noexcept
179 d /= ( first ? c_damp : 2U ) ;
182 for( ; d > ((c_base-c_tmin)*c_tmax)/2 ; k += c_base )
183 d /= (c_base-c_tmin) ;
184 return k + ((c_base-c_tmin+1U)*d) / (d+c_skew) ;
187bool G::IdnImp::parse( List & output , unicode_type u , std::size_t , std::size_t )
189 output.push_back( {u} ) ;
193G::IdnImp::value_type G::IdnImp::clamp( value_type v , value_type lo , value_type hi )
noexcept
196 return v < lo ? lo : ( hi < v ? hi : v ) ;
199bool G::IdnImp::is7Bit_(
char c )
noexcept
201 return (
static_cast<unsigned char>(c) & 0x80U ) == 0U ;
204bool G::IdnImp::is7Bit( std::string_view s )
noexcept
206 return std::all_of( s.begin() , s.end() , &IdnImp::is7Bit_ ) ;
209G::IdnImp::div_t G::IdnImp::div( value_type top , value_type bottom )
noexcept
212 return { top/bottom , top%bottom } ;
215void G::IdnImp::check(
bool b )
221void G::IdnImp::check( value_type a , value_type b , value_type c )
223 constexpr value_type maxint = std::numeric_limits<value_type>::max() ;
224 if( c != 0U && b > (maxint-a)/c )
225 throw Idn::Error(
"domain name too long: numeric overflow multiplying by " + std::to_string(c) ) ;
static void u8parse(std::string_view, ParseFn)
Calls a function for each Unicode value in the given UTF-8 string.
static bool valid(std::string_view) noexcept
Returns true if the string is valid UTF-8.
static bool isPrintable(std::string_view s) noexcept
Returns true if every character is 0x20 or above but not 0x7f.
static bool isPrintableAscii(std::string_view s) noexcept
Returns true if every character is between 0x20 and 0x7e inclusive.
A zero-copy string token iterator where the token separators are runs of whitespace characters,...
std::string encode(std::string_view domain)
Returns the given domain with A-lables.
bool valid(std::string_view domain)
Returns true if the given domain is valid with U-labels and/or A-labels.