E-MailRelay
gsmtpserverparser.cpp
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2023 Graeme Walker <graeme_walker@users.sourceforge.net>
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <http://www.gnu.org/licenses/>.
16// ===
17///
18/// \file gsmtpserverparser.cpp
19///
20
21#include "gdef.h"
22#include "gsmtpserverparser.h"
23#include "gxtext.h"
24#include "gstr.h"
25#include "gstringtoken.h"
26#include "glog.h"
27#include "gassert.h"
28#include <string>
29
30GSmtp::ServerParser::MailboxStyle GSmtp::ServerParser::mailboxStyle( const std::string & mailbox )
31{
32 static constexpr const char * cc =
33 "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
34 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" "\x7F" ;
35
36 bool invalid = mailbox.find_first_of( cc , 0U , 33U ) != std::string::npos ;
37 bool ascii = !invalid && G::Str::isPrintableAscii( mailbox ) ;
38
39 if( invalid )
40 return MailboxStyle::Invalid ;
41 else if( ascii )
42 return MailboxStyle::Ascii ;
43 else
44 return MailboxStyle::Utf8 ;
45}
46
47std::pair<std::size_t,bool> GSmtp::ServerParser::parseBdatSize( G::string_view bdat_line )
48{
49 G::StringTokenView token( bdat_line , "\t "_sv ) ;
50 std::size_t size = 0U ;
51 bool ok = false ;
52 if( token && ++token )
53 {
54 bool overflow = false ;
55 bool invalid = false ;
56 std::size_t n = G::Str::toUnsigned<std::size_t>( token.data() , token.data()+token.size() , overflow , invalid ) ;
57 if( !overflow && !invalid )
58 size = n , ok = true ;
59 }
60 return {size,ok} ;
61}
62
63std::pair<bool,bool> GSmtp::ServerParser::parseBdatLast( G::string_view bdat_line )
64{
65 G::StringTokenView token( bdat_line , "\t "_sv ) ;
66 bool last = false ;
67 bool ok = false ;
68 if( token && ++token )
69 {
70 ok = true ;
71 if( ++token )
72 ok = last = G::Str::imatch( "LAST"_sv , token() ) ;
73 }
74 return {last,ok} ;
75}
76
78{
79 G::StringTokenView t( line , " \t"_sv ) ;
80 if( !G::Str::imatch("MAIL"_sv,t()) || G::Str::ifind(t.next()(),"FROM:"_sv) != 0U )
81 return {"invalid mail-from command"} ;
82
83 AddressCommand result = parseAddressPart( line ) ;
84 if( result.error.empty() )
85 {
86 if( !parseMailStringValue(line,"SMTPUTF8="_sv,result).empty() ) // RFC-6531 3.4 para1, but not clear
87 result.error = "invalid mail-from parameter" ;
88
89 result.auth = parseMailStringValue( line , "AUTH="_sv , result , Conversion::ValidXtext ) ;
90 result.body = parseMailStringValue( line , "BODY="_sv , result , Conversion::Upper ) ; // RFC-1652, RFC-3030
91 result.size = parseMailNumericValue( line , "SIZE="_sv , result ) ; // RFC-1427 submitter's size estimate
92 result.smtputf8 = parseMailBoolean( line , "SMTPUTF8"_sv , result ) ;
93 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: error=[" << G::Str::printable(result.error) << "]" ) ;
94 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: address=[" << G::Str::printable(result.address) << "]" ) ;
95 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: size=" << result.size ) ;
96 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: auth=[" << G::Str::printable(result.auth) << "]" ) ;
97 G_DEBUG( "GSmtp::ServerParser::parseMailFrom: smtputf8=" << (result.smtputf8?"1":"0") ) ;
98 }
99 return result ;
100}
101
103{
104 G::StringTokenView t( line , " \t"_sv ) ;
105 if( !G::Str::imatch("RCPT"_sv,t()) || G::Str::ifind(t.next()(),"TO:"_sv) != 0U )
106 return {"invalid rcpt-to command"} ;
107
108 return parseAddressPart( line ) ;
109}
110
111GSmtp::ServerParser::AddressCommand GSmtp::ServerParser::parseAddressPart( G::string_view line )
112{
113 // RFC-5321 4.1.2
114 // eg. MAIL FROM:<>
115 // eg. MAIL FROM:<me@localhost> SIZE=12345
116 // eg. RCPT TO:<Postmaster>
117 // eg. RCPT TO:<@first.net,@second.net:you@last.net>
118 // eg. RCPT TO:<"alice\ \"jones\" :->"@example.com> XFOO=xyz
119
120 // early check of the character-set to reject NUL and CR-LF
121 if( line.find('\0') != std::string::npos ||
122 line.find_first_of("\r\n",0,2U) != std::string::npos )
123 {
124 return {"invalid character in mailbox name"} ;
125 }
126
127 // find the opening angle bracket
128 std::size_t startpos = line.find( ':' ) ;
129 if( startpos == std::string::npos )
130 return {"missing colon"} ;
131 startpos++ ;
132 while( startpos < line.size() && line[startpos] == ' ' )
133 startpos++ ; // (as requested)
134 if( (startpos+2U) > line.size() || line[startpos] != '<' || line.find('>',startpos+1U) == std::string::npos )
135 {
136 return {"missing or invalid angle brackets in mailbox name"} ;
137 }
138
139 // step over any source route
140 if( line[startpos+1U] == '@' )
141 {
142 // RFC-6531 complicates the syntax, but we follow RFC-5321 4.1.2 in
143 // assuming there is no colon within the RFC-6531 A-d-l syntax element
144 startpos = line.find( ':' , startpos+1U ) ;
145 if( startpos == std::string::npos || (startpos+2U) >= line.size() )
146 return {"invalid source route in mailbox name"} ;
147 }
148
149 // find the end, allowing for quoted angle brackets and escaped quotes
150 std::size_t endpos = 0U ;
151 if( line.at(startpos+1U) == '"' )
152 {
153 for( std::size_t i = startpos+2U ; endpos == 0U && i < line.size() ; i++ )
154 {
155 if( line[i] == '\\' )
156 i++ ;
157 else if( line[i] == '"' )
158 endpos = line.find( '>' , i ) ;
159 }
160 if( endpos == std::string::npos )
161 return {"invalid quoting"} ;
162 }
163 else
164 {
165 endpos = line.find( '>' , startpos+1U ) ;
166 G_ASSERT( endpos != std::string::npos ) ;
167 }
168 if( (endpos+1U) < line.size() && line.at(endpos+1U) != ' ' )
169 return {"invalid angle brackets"} ;
170
171 G_ASSERT( startpos != std::string::npos && endpos != std::string::npos ) ;
172 G_ASSERT( endpos > startpos ) ;
173 G_ASSERT( endpos < line.size() ) ;
174 G_ASSERT( line.at(startpos) == '<' || line.at(startpos) == ':' ) ;
175 G_ASSERT( line.at(endpos) == '>' ) ;
176
177 std::string address = std::string( line.data()+startpos+1U , endpos-startpos-1U ) ;
178
179 auto style = mailboxStyle( address ) ;
180 if( style == MailboxStyle::Invalid )
181 return {"invalid character in mailbox name"} ;
182
183 AddressCommand result ;
184 result.address = std::string( line.data()+startpos+1U , endpos-startpos-1U ) ;
185 result.utf8address = style == MailboxStyle::Utf8 ;
186 result.tailpos = endpos+1U ;
187 return result ;
188}
189
190std::size_t GSmtp::ServerParser::parseMailNumericValue( G::string_view line , G::string_view key_eq , AddressCommand & out )
191{
192 std::size_t result = 0U ;
193 if( out.error.empty() && out.tailpos != std::string::npos && out.tailpos < line.size() )
194 {
195 std::string str = parseMailStringValue( line , key_eq , out ) ;
196 if( !str.empty() && G::Str::isULong(str) )
197 result = static_cast<std::size_t>( G::Str::toULong(str,G::Str::Limited()) ) ;
198 }
199 return result ;
200}
201
202std::string GSmtp::ServerParser::parseMailStringValue( G::string_view line , G::string_view key_eq , AddressCommand & out , Conversion conversion )
203{
204 std::string result ;
205 if( out.error.empty() && out.tailpos != std::string::npos && out.tailpos < line.size() )
206 {
207 G::string_view tail = G::sv_substr( G::string_view(line) , out.tailpos ) ;
208 G::StringTokenView word( tail , " \t"_sv ) ;
209 for( ; word ; ++word )
210 {
211 if( G::Str::ifind( word() , key_eq ) == 0U && word().size() > key_eq.size() )
212 {
213 result = G::sv_to_string( word().substr(key_eq.size() ) ) ;
214 break ;
215 }
216 }
217 if( conversion == Conversion::ValidXtext )
218 result = G::Xtext::encode( G::Xtext::decode(result) ) ; // ensure valid xtext
219 else if( conversion == Conversion::Upper )
220 result = G::Str::upper( result ) ;
221 }
222 return result ;
223}
224
225bool GSmtp::ServerParser::parseMailBoolean( G::string_view line , G::string_view key , AddressCommand & out )
226{
227 bool result = false ;
228 if( out.error.empty() && out.tailpos != std::string::npos && out.tailpos < line.size() )
229 {
230 G::string_view tail = G::sv_substr( line , out.tailpos ) ;
231 G::StringTokenView word( tail , " \t"_sv ) ;
232 for( ; word && !result ; ++word )
233 {
234 if( word() == key )
235 result = true ;
236 }
237 }
238 return result ;
239}
240
241std::string GSmtp::ServerParser::parseVrfy( const std::string & line_in )
242{
243 G_ASSERT( G::Str::ifind(line_in,"VRFY") == 0U ) ;
244 std::string line = line_in ;
245 G::Str::trimRight( line , {" \t",2U} ) ;
246
247 if( line.size() > 9U )
248 {
249 // RFC-6531 3.7.4.2
250 std::string tail = line.substr( line.size() - 9U ) ;
251 G::Str::trimLeft( tail , {" \t",2U} ) ;
252 if( G::Str::imatch( "SMTPUTF8"_sv , tail ) )
253 line = line.substr( 0U , line.size()-9U ) ;
254 }
255
256 std::string to ;
257 std::size_t pos = line.find_first_of( " \t" ) ;
258 if( pos != std::string::npos )
259 to = line.substr(pos) ;
260 return G::Str::trimmed( to , {" \t",2U} ) ;
261}
262
263std::string GSmtp::ServerParser::parseHeloPeerName( const std::string & line )
264{
265 std::size_t pos = line.find_first_not_of( " \t" ) ;
266 if( pos == std::string::npos )
267 return std::string() ;
268
269 pos = line.find_first_of( " \t" , pos ) ;
270 if( pos == std::string::npos )
271 return std::string() ;
272
273 std::string smtp_peer_name = line.substr( pos + 1U ) ;
274 G::Str::trim( smtp_peer_name , {" \t",2U} ) ;
275 return smtp_peer_name ;
276}
277
static MailboxStyle mailboxStyle(const std::string &mailbox)
Classifies the given mailbox name.
static std::string parseVrfy(const std::string &)
Parses a VRFY command.
static std::pair< std::size_t, bool > parseBdatSize(G::string_view)
Parses a BDAT command.
static std::pair< bool, bool > parseBdatLast(G::string_view)
Parses a BDAT LAST command.
static AddressCommand parseMailFrom(G::string_view)
Parses a MAIL-FROM command.
static AddressCommand parseRcptTo(G::string_view)
Parses a RCPT-TO command.
static std::string parseHeloPeerName(const std::string &)
Parses the peer name from an HELO/EHLO command.
static bool isPrintableAscii(string_view s) noexcept
Returns true if every character is a 7-bit, non-control character (ie.
Definition: gstr.cpp:416
static std::size_t ifind(string_view s, string_view key)
Returns the position of the key in 's' using a seven-bit case-insensitive search.
Definition: gstr.cpp:1436
static bool imatch(char, char) noexcept
Returns true if the two characters are the same, ignoring seven-bit case.
Definition: gstr.cpp:1418
static std::string & trimLeft(std::string &s, string_view ws, std::size_t limit=0U)
Trims the lhs of s, taking off up to 'limit' of the 'ws' characters.
Definition: gstr.cpp:288
static bool isULong(string_view s) noexcept
Returns true if the string can be converted into an unsigned long without throwing an exception.
Definition: gstr.cpp:457
static std::string & trimRight(std::string &s, string_view ws, std::size_t limit=0U)
Trims the rhs of s, taking off up to 'limit' of the 'ws' characters.
Definition: gstr.cpp:313
static std::string printable(const std::string &in, char escape='\\')
Returns a printable representation of the given input string, using chacter code ranges 0x20 to 0x7e ...
Definition: gstr.cpp:916
static std::string upper(string_view)
Returns a copy of 's' in which all seven-bit lower-case characters have been replaced by upper-case c...
Definition: gstr.cpp:839
static unsigned long toULong(string_view s, Limited)
Converts string 's' to an unsigned long.
Definition: gstr.cpp:672
static std::string trimmed(const std::string &s, string_view ws)
Returns a trim()med version of s.
Definition: gstr.cpp:343
static std::string & trim(std::string &s, string_view ws)
Trims both ends of s, taking off any of the 'ws' characters.
Definition: gstr.cpp:338
A zero-copy string token iterator where the token separators are runs of whitespace characters,...
Definition: gstringtoken.h:54
StringTokenT< T > & next() noexcept
Moves to the next token.
Definition: gstringtoken.h:207
const char_type * data() const noexcept
Returns the current token pointer.
Definition: gstringtoken.h:158
std::size_t size() const noexcept
Returns the current token size.
Definition: gstringtoken.h:165
static std::string encode(string_view)
Encodes the given string.
Definition: gxtext.cpp:97
static std::string decode(string_view)
Decodes the given string.
Definition: gxtext.cpp:119
A class like c++17's std::string_view.
Definition: gstringview.h:51
Overload discrimiator for G::Str::toUWhatever() requesting a range-limited result.
Definition: gstr.h:56