E-MailRelay
gsmtpserverparser.cpp
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2024 Graeme Walker <graeme_walker@users.sourceforge.net>
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <http://www.gnu.org/licenses/>.
16// ===
17///
18/// \file gsmtpserverparser.cpp
19///
20
21#include "gdef.h"
22#include "gsmtpserverparser.h"
23#include "gidn.h"
24#include "gxtext.h"
25#include "gstr.h"
26#include "gstringtoken.h"
27#include "gconvert.h"
28#include "glog.h"
29#include "gassert.h"
30#include <string>
31
32std::pair<std::size_t,bool> GSmtp::ServerParser::parseBdatSize( std::string_view bdat_line )
33{
34 G::StringTokenView token( bdat_line , "\t "_sv ) ;
35 std::size_t size = 0U ;
36 bool ok = false ;
37 if( ++token )
38 {
39 bool overflow = false ;
40 bool invalid = false ;
41 std::size_t n = G::Str::toUnsigned<std::size_t>( token.data() , token.data()+token.size() , overflow , invalid ) ;
42 if( !overflow && !invalid )
43 size = n , ok = true ;
44 }
45 return {size,ok} ;
46}
47
48std::pair<bool,bool> GSmtp::ServerParser::parseBdatLast( std::string_view bdat_line )
49{
50 G::StringTokenView token( bdat_line , "\t "_sv ) ;
51 bool last = false ;
52 bool ok = false ;
53 if( ++token )
54 {
55 ok = true ;
56 if( ++token )
57 ok = last = G::Str::imatch( "LAST"_sv , token() ) ;
58 }
59 return {last,ok} ;
60}
61
63{
64 G::StringTokenView t( line , " \t"_sv ) ;
65 if( !G::Str::imatch("MAIL"_sv,t()) || G::Str::ifind(t.next()(),"FROM:"_sv) != 0U )
66 return {"invalid mail-from command"} ;
67
68 AddressCommand result = parseAddressPart( line , config ) ;
69 if( result.error.empty() )
70 {
71 if( !parseMailStringValue(line,"SMTPUTF8="_sv,result).empty() ) // RFC-6531 3.4 para1, but not clear
72 result.error = "invalid mail-from parameter" ;
73
74 result.auth = parseMailStringValue( line , "AUTH="_sv , result , Conversion::ValidXtext ) ;
75 result.body = parseMailStringValue( line , "BODY="_sv , result , Conversion::Upper ) ; // RFC-1652, RFC-3030
76 result.size = parseMailNumericValue( line , "SIZE="_sv , result ) ; // RFC-1427 submitter's size estimate
77 result.smtputf8 = parseMailBoolean( line , "SMTPUTF8"_sv , result ) ;
78 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: error=[" << G::Str::printable(result.error) << "]" ) ;
79 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: address=[" << G::Str::printable(result.address) << "]" ) ;
80 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: size=" << result.size ) ;
81 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: auth=[" << G::Str::printable(result.auth) << "]" ) ;
82 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: smtputf8=" << (result.smtputf8?"1":"0") ) ;
83 }
84 return result ;
85}
86
88{
89 G::StringTokenView t( line , " \t"_sv ) ;
90 if( !G::Str::imatch("RCPT"_sv,t()) || G::Str::ifind(t.next()(),"TO:"_sv) != 0U )
91 return {"invalid rcpt-to command"} ;
92
93 return parseAddressPart( line , config ) ;
94}
95
96GSmtp::ServerParser::AddressCommand GSmtp::ServerParser::parseAddressPart( std::string_view line , const Config & config )
97{
98 // RFC-5321 4.1.2
99 // eg. MAIL FROM:<>
100 // eg. MAIL FROM:<me@localhost> SIZE=12345
101 // eg. RCPT TO:<Postmaster>
102 // eg. RCPT TO:<@first.net,@second.net:you@last.net>
103 // eg. RCPT TO:<"alice\ \"jones\" :->"@example.com> XFOO=xyz
104
105 // early check of the character-set to reject NUL and CR-LF
106 if( line.find('\0') != std::string::npos ||
107 line.find_first_of("\r\n",0,2U) != std::string::npos )
108 {
109 return {"invalid character in mailbox name"} ;
110 }
111
112 // find one past the colon
113 std::size_t startpos = line.find( ':' ) ;
114 if( startpos == std::string::npos )
115 return {"missing colon"} ;
116 startpos++ ;
117
118 // test for possibly-allowed errors
119 AddressCommand result ;
120 if( startpos < line.size() && ( line[startpos] == ' ' || line[startpos] == '\t' ) )
121 result.invalid_spaces = true ;
122 startpos = line.find_first_not_of( " \t" , startpos , 2U ) ;
123 if( startpos == std::string::npos ) startpos = line.size() ;
124 if( startpos < line.size() && line[startpos] != '<' )
125 result.invalid_nobrackets = true ;
126
127 // fail unallowed errors
128 if( result.invalid_spaces && !config.allow_spaces )
129 {
130 result.error = "invalid space after colon" ;
131 return result ;
132 }
133 if( result.invalid_nobrackets && !config.allow_nobrackets )
134 {
135 result.error = "missing angle brackets in mailbox name" ;
136 return result ;
137 }
138
139 // find the address part
140 std::size_t endpos = 0U ;
141 if( result.invalid_nobrackets )
142 {
143 endpos = line.find_first_of( " \t"_sv , startpos ) ;
144 if( endpos == std::string::npos ) endpos = line.size() ;
145 G_ASSERT( startpos < line.size() && endpos <= line.size() && endpos > startpos ) ;
146 }
147 else if( (startpos+2U) > line.size() || line.find('>',startpos+1U) == std::string::npos )
148 {
149 result.error = "invalid angle brackets in mailbox name" ;
150 return result ;
151 }
152 else
153 {
154 // step over any source route so startpos is the colon not the "<"
155 if( line.at(startpos+1U) == '@' )
156 {
157 // RFC-6531 complicates the syntax, but we follow RFC-5321 4.1.2 in
158 // assuming there is no colon within the RFC-6531 A-d-l syntax element
159 startpos = line.find( ':' , startpos+1U ) ;
160 if( startpos == std::string::npos || (startpos+2U) >= line.size() )
161 return {"invalid source route in mailbox name"} ;
162 }
163
164 // find the endpos allowing for quoted angle brackets and escaped quotes
165 if( line.at(startpos+1U) == '"' )
166 {
167 for( std::size_t i = startpos+2U ; endpos == 0U && i < line.size() ; i++ )
168 {
169 if( line[i] == '\\' )
170 i++ ;
171 else if( line[i] == '"' )
172 endpos = line.find( '>' , i ) ;
173 }
174 if( endpos == std::string::npos )
175 return {"invalid quoting"} ;
176 }
177 else
178 {
179 endpos = line.find( '>' , startpos+1U ) ;
180 G_ASSERT( endpos != std::string::npos ) ;
181 }
182 if( (endpos+1U) < line.size() && line.at(endpos+1U) != ' ' )
183 return {"invalid angle brackets"} ;
184
185 G_ASSERT( startpos < line.size() && endpos < line.size() && endpos > startpos ) ;
186 G_ASSERT( line.at(startpos) == '<' || line.at(startpos) == ':' ) ;
187 G_ASSERT( line.at(endpos) == '>' ) ;
188 }
189
190 std::string_view address =
191 result.invalid_nobrackets ?
192 std::string_view( line.data()+startpos , endpos-startpos ) :
193 std::string_view( line.data()+startpos+1U , endpos-startpos-1U ) ;
194
195 auto address_style = GStore::MessageStore::addressStyle( address ) ;
196 if( address_style == AddressStyle::Invalid )
197 return {"invalid character in mailbox name"} ;
198
199 result.utf8_mailbox_part = address_style == AddressStyle::Utf8Both || address_style == AddressStyle::Utf8Mailbox ;
200 result.utf8_domain_part = address_style == AddressStyle::Utf8Both || address_style == AddressStyle::Utf8Domain ;
201 result.raw_address = G::sv_to_string( address ) ;
202 result.address = result.utf8_domain_part ? encodeDomain(address) : result.raw_address ;
203 result.address_style = address_style ;
204 result.tailpos = result.invalid_nobrackets ? endpos : (endpos+1U) ;
205 return result ;
206}
207
208std::string GSmtp::ServerParser::encodeDomain( std::string_view address )
209{
210 std::size_t at_pos = address.rfind( '@' ) ;
211 std::string_view user = G::Str::headView( address , at_pos , address ) ;
212 std::string_view domain = G::Str::tailView( address , at_pos ) ;
213 return
214 domain.empty() ?
215 G::sv_to_string( address ) :
216 G::sv_to_string(user).append(1U,'@').append(G::Idn::encode(domain)) ;
217}
218
219std::size_t GSmtp::ServerParser::parseMailNumericValue( std::string_view line , std::string_view key_eq , AddressCommand & out )
220{
221 std::size_t result = 0U ;
222 if( out.error.empty() && out.tailpos != std::string::npos && out.tailpos < line.size() )
223 {
224 std::string str = parseMailStringValue( line , key_eq , out ) ;
225 if( !str.empty() && G::Str::isULong(str) )
226 result = static_cast<std::size_t>( G::Str::toULong(str,G::Str::Limited()) ) ;
227 }
228 return result ;
229}
230
231std::string GSmtp::ServerParser::parseMailStringValue( std::string_view line , std::string_view key_eq , AddressCommand & out , Conversion conversion )
232{
233 std::string result ;
234 if( out.error.empty() && out.tailpos != std::string::npos && out.tailpos < line.size() )
235 {
236 std::string_view tail = G::sv_substr_noexcept( std::string_view(line) , out.tailpos ) ;
237 G::StringTokenView word( tail , " \t"_sv ) ;
238 for( ; word ; ++word )
239 {
240 if( G::Str::ifind( word() , key_eq ) == 0U && word().size() > key_eq.size() )
241 {
242 result = G::sv_to_string( word().substr(key_eq.size() ) ) ;
243 break ;
244 }
245 }
246 if( conversion == Conversion::ValidXtext )
247 result = G::Xtext::encode( G::Xtext::decode(result) ) ; // ensure valid xtext
248 else if( conversion == Conversion::Upper )
249 result = G::Str::upper( result ) ;
250 }
251 return result ;
252}
253
254bool GSmtp::ServerParser::parseMailBoolean( std::string_view line , std::string_view key , AddressCommand & out )
255{
256 bool result = false ;
257 if( out.error.empty() && out.tailpos != std::string::npos && out.tailpos < line.size() )
258 {
259 std::string_view tail = G::sv_substr_noexcept( line , out.tailpos ) ;
260 G::StringTokenView word( tail , " \t"_sv ) ;
261 for( ; word && !result ; ++word )
262 {
263 if( word() == key )
264 result = true ;
265 }
266 }
267 return result ;
268}
269
270std::string GSmtp::ServerParser::parseVrfy( const std::string & line_in )
271{
272 G_ASSERT( G::Str::ifind(line_in,"VRFY") == 0U ) ;
273 std::string line = line_in ;
274 G::Str::trimRight( line , {" \t",2U} ) ;
275
276 if( line.size() > 9U )
277 {
278 // RFC-6531 3.7.4.2
279 std::string tail = line.substr( line.size() - 9U ) ;
280 G::Str::trimLeft( tail , {" \t",2U} ) ;
281 if( G::Str::imatch( "SMTPUTF8"_sv , tail ) )
282 line = line.substr( 0U , line.size()-9U ) ;
283 }
284
285 std::string to ;
286 std::size_t pos = line.find_first_of( " \t" ) ;
287 if( pos != std::string::npos )
288 to = line.substr(pos) ;
289 return G::Str::trimmed( to , {" \t",2U} ) ;
290}
291
292std::string GSmtp::ServerParser::parseHeloPeerName( const std::string & line )
293{
294 std::size_t pos = line.find_first_not_of( " \t" ) ;
295 if( pos == std::string::npos )
296 return {} ;
297
298 pos = line.find_first_of( " \t" , pos ) ;
299 if( pos == std::string::npos )
300 return {} ;
301
302 std::string smtp_peer_name = line.substr( pos + 1U ) ;
303 G::Str::trim( smtp_peer_name , {" \t",2U} ) ;
304 return smtp_peer_name ;
305}
306
static AddressCommand parseRcptTo(std::string_view, const Config &)
Parses a RCPT-TO command.
static std::string parseVrfy(const std::string &)
Parses a VRFY command.
static AddressCommand parseMailFrom(std::string_view, const Config &)
Parses a MAIL-FROM command.
static std::pair< bool, bool > parseBdatLast(std::string_view)
Parses a BDAT LAST command.
static std::pair< std::size_t, bool > parseBdatSize(std::string_view)
Parses a BDAT command.
static std::string parseHeloPeerName(const std::string &)
Parses the peer name from an HELO/EHLO command.
static AddressStyle addressStyle(std::string_view address)
Parses an address to determine whether it has ASCII or UTF-8 parts.
static std::string & trimRight(std::string &s, std::string_view ws, std::size_t limit=0U)
Trims the rhs of s, taking off up to 'limit' of the 'ws' characters.
Definition: gstr.cpp:313
static bool imatch(char, char) noexcept
Returns true if the two characters are the same, ignoring seven-bit case.
Definition: gstr.cpp:1415
static std::string & trim(std::string &s, std::string_view ws)
Trims both ends of s, taking off any of the 'ws' characters.
Definition: gstr.cpp:338
static bool isULong(std::string_view s) noexcept
Returns true if the string can be converted into an unsigned long without throwing an exception.
Definition: gstr.cpp:454
static std::string printable(const std::string &in, char escape='\\')
Returns a printable representation of the given input string, using chacter code ranges 0x20 to 0x7e ...
Definition: gstr.cpp:913
static std::string upper(std::string_view)
Returns a copy of 's' in which all seven-bit lower-case characters have been replaced by upper-case c...
Definition: gstr.cpp:836
static std::size_t ifind(std::string_view s, std::string_view key)
Returns the position of the key in 's' using a seven-bit case-insensitive search.
Definition: gstr.cpp:1433
static std::string & trimLeft(std::string &s, std::string_view ws, std::size_t limit=0U)
Trims the lhs of s, taking off up to 'limit' of the 'ws' characters.
Definition: gstr.cpp:288
static std::string_view tailView(std::string_view in, std::size_t pos, std::string_view default_={}) noexcept
Like tail() but returning a view into the input string.
Definition: gstr.cpp:1337
static unsigned long toULong(std::string_view s, Limited)
Converts string 's' to an unsigned long.
Definition: gstr.cpp:669
static std::string_view headView(std::string_view in, std::size_t pos, std::string_view default_={}) noexcept
Like head() but returning a view into the input string.
Definition: gstr.cpp:1308
static std::string trimmed(const std::string &s, std::string_view ws)
Returns a trim()med version of s.
Definition: gstr.cpp:343
A zero-copy string token iterator where the token separators are runs of whitespace characters,...
Definition: gstringtoken.h:54
StringTokenT< T > & next() noexcept
Moves to the next token.
Definition: gstringtoken.h:205
const char_type * data() const noexcept
Returns the current token pointer.
Definition: gstringtoken.h:156
std::size_t size() const noexcept
Returns the current token size.
Definition: gstringtoken.h:163
static std::string encode(std::string_view)
Encodes the given string.
Definition: gxtext.cpp:97
static std::string decode(std::string_view)
Decodes the given string.
Definition: gxtext.cpp:119
Low-level classes.
Definition: garg.h:36
STL namespace.
A configuration structure for GSmtp::ServerParser.
Overload discrimiator for G::Str::toUWhatever() requesting a range-limited result.
Definition: gstr.h:56