2020-02-13 03:05:15 +01:00
# include "lokimq.h"
2020-02-03 03:39:26 +01:00
# include <map>
2020-02-11 07:30:07 +01:00
# include <random>
2020-02-03 03:39:26 +01:00
extern " C " {
# include <sodium.h>
}
2020-02-06 01:21:27 +01:00
# include "batch.h"
2020-02-03 03:39:26 +01:00
# include "hex.h"
namespace lokimq {
constexpr char SN_ADDR_COMMAND [ ] = " inproc://sn-command " ;
constexpr char SN_ADDR_WORKERS [ ] = " inproc://sn-workers " ;
constexpr char SN_ADDR_SELF [ ] = " inproc://sn-self " ;
constexpr char ZMQ_ADDR_ZAP [ ] = " inproc://zeromq.zap.01 " ;
// Inside some method:
// LMQ_LOG(warn, "bad ", 42, " stuff");
# define LMQ_LOG(level, ...) log_(LogLevel::level, __FILE__, __LINE__, __VA_ARGS__)
2020-02-13 03:10:40 +01:00
# ifndef NDEBUG
// Same as LMQ_LOG(trace, ...) when not doing a release build; nothing under a release build.
# define LMQ_TRACE(...) log_(LogLevel::trace, __FILE__, __LINE__, __VA_ARGS__)
# else
# define LMQ_TRACE(...)
# endif
2020-02-03 03:39:26 +01:00
namespace {
/// Destructor for create_message(std::string&&) that zmq calls when it's done with the message.
extern " C " void message_buffer_destroy ( void * , void * hint ) {
delete reinterpret_cast < std : : string * > ( hint ) ;
}
/// Creates a message without needing to reallocate the provided string data
zmq : : message_t create_message ( std : : string & & data ) {
auto * buffer = new std : : string ( std : : move ( data ) ) ;
return zmq : : message_t { & ( * buffer ) [ 0 ] , buffer - > size ( ) , message_buffer_destroy , buffer } ;
} ;
/// Create a message copying from a string_view
zmq : : message_t create_message ( string_view data ) {
return zmq : : message_t { data . begin ( ) , data . end ( ) } ;
}
/// Creates a message by bt-serializing the given value (string, number, list, or dict)
template < typename T >
zmq : : message_t create_bt_message ( T & & data ) { return create_message ( bt_serialize ( std : : forward < T > ( data ) ) ) ; }
/// Sends a control message to a specific destination by prefixing the worker name (or identity)
/// then appending the command and optional data (if non-empty). (This is needed when sending the control message
/// to a router socket, i.e. inside the proxy thread).
void route_control ( zmq : : socket_t & sock , string_view identity , string_view cmd , const std : : string & data = { } ) {
sock . send ( create_message ( identity ) , zmq : : send_flags : : sndmore ) ;
detail : : send_control ( sock , cmd , data ) ;
}
// Receive all the parts of a single message from the given socket. Returns true if a message was
// received, false if called with flags=zmq::recv_flags::dontwait and no message was available.
2020-02-06 05:50:31 +01:00
bool recv_message_parts ( zmq : : socket_t & sock , std : : vector < zmq : : message_t > & parts , const zmq : : recv_flags flags = zmq : : recv_flags : : none ) {
do {
2020-02-03 03:39:26 +01:00
zmq : : message_t msg ;
if ( ! sock . recv ( msg , flags ) )
return false ;
2020-02-06 05:50:31 +01:00
parts . push_back ( std : : move ( msg ) ) ;
} while ( parts . back ( ) . more ( ) ) ;
2020-02-03 03:39:26 +01:00
return true ;
}
template < typename It >
void send_message_parts ( zmq : : socket_t & sock , It begin , It end ) {
while ( begin ! = end ) {
// FIXME: for outgoing connections on ZMQ_DEALER we want to use ZMQ_DONTWAIT and handle
// EAGAIN error (which either means the peer HWM is hit -- probably indicating a connection
// failure -- or the underlying connect() system call failed). Assuming it's an outgoing
// connection, we should destroy it.
zmq : : message_t & msg = * begin + + ;
sock . send ( msg , begin = = end ? zmq : : send_flags : : none : zmq : : send_flags : : sndmore ) ;
}
}
template < typename Container >
void send_message_parts ( zmq : : socket_t & sock , Container & & c ) {
send_message_parts ( sock , c . begin ( ) , c . end ( ) ) ;
}
2020-02-06 05:50:31 +01:00
/// Sends a message with an initial route. `msg` and `data` can be empty: if `msg` is empty then
/// the msg frame will be an empty message; if `data` is empty then the data frame will be omitted.
void send_routed_message ( zmq : : socket_t & socket , std : : string route , std : : string msg = { } , std : : string data = { } ) {
2020-02-28 05:16:43 +01:00
assert ( ! route . empty ( ) ) ;
2020-02-06 05:50:31 +01:00
std : : array < zmq : : message_t , 3 > msgs { { create_message ( std : : move ( route ) ) } } ;
if ( ! msg . empty ( ) )
msgs [ 1 ] = create_message ( std : : move ( msg ) ) ;
2020-02-03 03:39:26 +01:00
if ( ! data . empty ( ) )
msgs [ 2 ] = create_message ( std : : move ( data ) ) ;
send_message_parts ( socket , msgs . begin ( ) , data . empty ( ) ? std : : prev ( msgs . end ( ) ) : msgs . end ( ) ) ;
}
2020-02-11 07:30:07 +01:00
// Sends some stuff to a socket directly.
2020-02-03 03:39:26 +01:00
void send_direct_message ( zmq : : socket_t & socket , std : : string msg , std : : string data = { } ) {
std : : array < zmq : : message_t , 2 > msgs { { create_message ( std : : move ( msg ) ) } } ;
if ( ! data . empty ( ) )
msgs [ 1 ] = create_message ( std : : move ( data ) ) ;
send_message_parts ( socket , msgs . begin ( ) , data . empty ( ) ? std : : prev ( msgs . end ( ) ) : msgs . end ( ) ) ;
}
template < typename MessageContainer >
std : : vector < std : : string > as_strings ( const MessageContainer & msgs ) {
std : : vector < std : : string > result ;
result . reserve ( msgs . size ( ) ) ;
for ( const auto & msg : msgs )
result . emplace_back ( msg . template data < char > ( ) , msg . size ( ) ) ;
return result ;
}
2020-02-25 03:20:56 +01:00
// Returns a string view of the given message data. It's the caller's responsibility to keep the
2020-02-28 05:16:43 +01:00
// referenced message alive. If you want a std::string instead just call `m.to_string()`
2020-02-25 03:20:56 +01:00
string_view view ( const zmq : : message_t & m ) {
2020-02-03 03:39:26 +01:00
return { m . data < char > ( ) , m . size ( ) } ;
}
// Builds a ZMTP metadata key-value pair. These will be available on every message from that peer.
// Keys must start with X- and be <= 255 characters.
std : : string zmtp_metadata ( string_view key , string_view value ) {
assert ( key . size ( ) > 2 & & key . size ( ) < = 255 & & key [ 0 ] = = ' X ' & & key [ 1 ] = = ' - ' ) ;
std : : string result ;
result . reserve ( 1 + key . size ( ) + 4 + value . size ( ) ) ;
result + = static_cast < char > ( key . size ( ) ) ; // Size octet of key
result . append ( & key [ 0 ] , key . size ( ) ) ; // key data
for ( int i = 24 ; i > = 0 ; i - = 8 ) // 4-byte size of value in network order
result + = static_cast < char > ( ( value . size ( ) > > i ) & 0xff ) ;
result . append ( & value [ 0 ] , value . size ( ) ) ; // value data
return result ;
}
2020-02-11 07:30:07 +01:00
void check_started ( const std : : thread & proxy_thread , const std : : string & verb ) {
if ( ! proxy_thread . joinable ( ) )
throw std : : logic_error ( " Cannot " + verb + " before calling `start()` " ) ;
}
void check_not_started ( const std : : thread & proxy_thread , const std : : string & verb ) {
2020-02-06 01:21:27 +01:00
if ( proxy_thread . joinable ( ) )
2020-02-11 07:30:07 +01:00
throw std : : logic_error ( " Cannot " + verb + " after calling `start()` " ) ;
2020-02-03 03:39:26 +01:00
}
// Extracts and builds the "send" part of a message for proxy_send/proxy_reply
2020-02-11 07:30:07 +01:00
std : : list < zmq : : message_t > build_send_parts ( bt_list_consumer send , string_view route ) {
2020-02-03 03:39:26 +01:00
std : : list < zmq : : message_t > parts ;
if ( ! route . empty ( ) )
parts . push_back ( create_message ( route ) ) ;
2020-02-11 07:30:07 +01:00
while ( ! send . is_finished ( ) )
parts . push_back ( create_message ( send . consume_string ( ) ) ) ;
2020-02-03 03:39:26 +01:00
return parts ;
}
std : : string to_string ( AuthLevel a ) {
switch ( a ) {
case AuthLevel : : denied : return " denied " ;
case AuthLevel : : none : return " none " ;
case AuthLevel : : basic : return " basic " ;
case AuthLevel : : admin : return " admin " ;
default : return " (unknown) " ;
}
}
2020-02-28 05:16:43 +01:00
AuthLevel auth_from_string ( string_view a ) {
if ( a = = " none " ) return AuthLevel : : none ;
if ( a = = " basic " ) return AuthLevel : : basic ;
if ( a = = " admin " ) return AuthLevel : : admin ;
return AuthLevel : : denied ;
}
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
/// Extracts a pubkey, SN status, and auth level from a zmq message received on a *listening*
/// socket.
std : : tuple < std : : string , bool , AuthLevel > extract_metadata ( zmq : : message_t & msg ) {
auto result = std : : make_tuple ( " " s , false , AuthLevel : : none ) ;
try {
string_view pubkey_hex { msg . gets ( " User-Id " ) } ;
if ( pubkey_hex . size ( ) ! = 64 )
throw std : : logic_error ( " bad user-id " ) ;
assert ( is_hex ( pubkey_hex . begin ( ) , pubkey_hex . end ( ) ) ) ;
auto & pubkey = std : : get < std : : string > ( result ) ;
pubkey . resize ( 32 , 0 ) ;
from_hex ( pubkey_hex . begin ( ) , pubkey_hex . end ( ) , pubkey . begin ( ) ) ;
} catch ( . . . ) { }
2020-02-06 01:21:27 +01:00
try {
string_view is_sn { msg . gets ( " X-SN " ) } ;
if ( is_sn . size ( ) = = 1 & & is_sn [ 0 ] = = ' 1 ' )
2020-02-28 05:16:43 +01:00
std : : get < bool > ( result ) = true ;
} catch ( . . . ) { }
try {
string_view auth_level { msg . gets ( " X-AuthLevel " ) } ;
std : : get < AuthLevel > ( result ) = auth_from_string ( auth_level ) ;
} catch ( . . . ) { }
return result ;
2020-02-03 03:39:26 +01:00
}
const char * peer_address ( zmq : : message_t & msg ) {
try { return msg . gets ( " Peer-Address " ) ; } catch ( . . . ) { }
return " (unknown) " ;
}
2020-02-28 05:16:43 +01:00
void add_pollitem ( std : : vector < zmq : : pollitem_t > & pollitems , zmq : : socket_t & sock ) {
pollitems . emplace_back ( ) ;
auto & p = pollitems . back ( ) ;
p . socket = static_cast < void * > ( sock ) ;
p . fd = 0 ;
p . events = ZMQ_POLLIN ;
}
2020-02-03 03:39:26 +01:00
} // anonymous namespace
namespace detail {
// Sends a control messages between proxy and threads or between proxy and workers consisting of a
// single command codes with an optional data part (the data frame is omitted if empty).
void send_control ( zmq : : socket_t & sock , string_view cmd , std : : string data ) {
auto c = create_message ( std : : move ( cmd ) ) ;
if ( data . empty ( ) ) {
sock . send ( c , zmq : : send_flags : : none ) ;
} else {
auto d = create_message ( std : : move ( data ) ) ;
sock . send ( c , zmq : : send_flags : : sndmore ) ;
sock . send ( d , zmq : : send_flags : : none ) ;
}
}
} // namespace detail
2020-02-28 05:16:43 +01:00
std : : ostream & operator < < ( std : : ostream & o , const ConnectionID & conn ) {
if ( ! conn . pk . empty ( ) )
return o < < ( conn . sn ( ) ? " SN " : " non-SN authenticated remote " ) < < to_hex ( conn . pk ) ;
else
return o < < " unauthenticated remote [ " < < conn . id < < " ] " ;
}
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
void LokiMQ : : rebuild_pollitems ( ) {
pollitems . clear ( ) ;
add_pollitem ( pollitems , command ) ;
add_pollitem ( pollitems , workers_socket ) ;
add_pollitem ( pollitems , zap_auth ) ;
for ( auto & s : connections )
add_pollitem ( pollitems , s ) ;
2020-02-03 03:39:26 +01:00
}
void LokiMQ : : log_level ( LogLevel level ) {
log_lvl . store ( level , std : : memory_order_relaxed ) ;
}
LogLevel LokiMQ : : log_level ( ) const {
return log_lvl . load ( std : : memory_order_relaxed ) ;
}
void LokiMQ : : add_category ( std : : string name , Access access_level , unsigned int reserved_threads , int max_queue ) {
2020-02-11 07:30:07 +01:00
check_not_started ( proxy_thread , " add a category " ) ;
2020-02-03 03:39:26 +01:00
if ( name . size ( ) > MAX_CATEGORY_LENGTH )
throw std : : runtime_error ( " Invalid category name ` " + name + " ': name too long (> " + std : : to_string ( MAX_CATEGORY_LENGTH ) + " ) " ) ;
if ( name . empty ( ) | | name . find ( ' . ' ) ! = std : : string : : npos )
throw std : : runtime_error ( " Invalid category name ` " + name + " ' " ) ;
auto it = categories . find ( name ) ;
if ( it ! = categories . end ( ) )
throw std : : runtime_error ( " Unable to add category ` " + name + " ': that category already exists " ) ;
categories . emplace ( std : : move ( name ) , category { access_level , reserved_threads , max_queue } ) ;
}
void LokiMQ : : add_command ( const std : : string & category , std : : string name , CommandCallback callback ) {
2020-02-11 07:30:07 +01:00
check_not_started ( proxy_thread , " add a command " ) ;
2020-02-03 03:39:26 +01:00
if ( name . size ( ) > MAX_COMMAND_LENGTH )
throw std : : runtime_error ( " Invalid command name ` " + name + " ': name too long (> " + std : : to_string ( MAX_COMMAND_LENGTH ) + " ) " ) ;
auto catit = categories . find ( category ) ;
if ( catit = = categories . end ( ) )
throw std : : runtime_error ( " Cannot add a command to unknown category ` " + category + " ' " ) ;
std : : string fullname = category + ' . ' + name ;
if ( command_aliases . count ( fullname ) )
throw std : : runtime_error ( " Cannot add command ` " + fullname + " ': a command alias with that name is already defined " ) ;
2020-02-11 07:30:07 +01:00
auto ins = catit - > second . commands . insert ( { std : : move ( name ) , { std : : move ( callback ) , false } } ) ;
2020-02-03 03:39:26 +01:00
if ( ! ins . second )
throw std : : runtime_error ( " Cannot add command ` " + fullname + " ': that command already exists " ) ;
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : add_request_command ( const std : : string & category , std : : string name , CommandCallback callback ) {
add_command ( category , name , std : : move ( callback ) ) ;
categories . at ( category ) . commands . at ( name ) . second = true ;
}
2020-02-03 03:39:26 +01:00
void LokiMQ : : add_command_alias ( std : : string from , std : : string to ) {
2020-02-11 07:30:07 +01:00
check_not_started ( proxy_thread , " add a command alias " ) ;
2020-02-03 03:39:26 +01:00
if ( from . empty ( ) )
throw std : : runtime_error ( " Cannot add an alias for empty command " ) ;
size_t fromdot = from . find ( ' . ' ) ;
if ( fromdot = = 0 ) // We don't have to have a ., but if we do it can't be at the beginning.
throw std : : runtime_error ( " Invalid command alias ` " + from + " ' " ) ;
size_t todot = to . find ( ' . ' ) ;
if ( todot = = 0 | | todot = = std : : string : : npos ) // must have a dot for the target
throw std : : runtime_error ( " Invalid command alias target ` " + to + " ' " ) ;
if ( fromdot ! = std : : string : : npos ) {
auto catit = categories . find ( from . substr ( 0 , fromdot ) ) ;
if ( catit ! = categories . end ( ) & & catit - > second . commands . count ( from . substr ( fromdot + 1 ) ) )
throw std : : runtime_error ( " Invalid command alias: ` " + from + " ' would mask an existing command " ) ;
}
auto ins = command_aliases . emplace ( std : : move ( from ) , std : : move ( to ) ) ;
if ( ! ins . second )
throw std : : runtime_error ( " Cannot add command alias ` " + ins . first - > first + " ': that alias already exists " ) ;
}
std : : atomic < int > next_id { 1 } ;
/// We have one mutex here that is generally used once per thread: to create a thread-local command
/// socket to talk to the proxy thread's control socket. We need the proxy thread to also have a
/// copy of it so that it can close them when it is exiting, and to guard against trying to create
/// one while the proxy is trying to quit.
std : : mutex control_sockets_mutex ;
/// Accesses a thread-local command socket connected to the proxy's command socket used to issue
/// commands in a thread-safe manner. A mutex is only required here the first time a thread
/// accesses the control socket.
zmq : : socket_t & LokiMQ : : get_control_socket ( ) {
2020-02-11 07:30:07 +01:00
assert ( proxy_thread . joinable ( ) ) ;
2020-02-03 03:39:26 +01:00
// Maps the LokiMQ unique ID to a local thread command socket.
static thread_local std : : map < int , std : : shared_ptr < zmq : : socket_t > > control_sockets ;
static thread_local std : : pair < int , std : : shared_ptr < zmq : : socket_t > > last { - 1 , nullptr } ;
// Optimize by caching the last value; LokiMQ is often a singleton and in that case we're
// going to *always* hit this optimization. Even if it isn't, we're probably likely to need the
// same control socket from the same thread multiple times sequentially so this may still help.
if ( object_id = = last . first )
return * last . second ;
auto it = control_sockets . find ( object_id ) ;
if ( it ! = control_sockets . end ( ) ) {
last = * it ;
return * last . second ;
}
std : : lock_guard < std : : mutex > lock { control_sockets_mutex } ;
if ( proxy_shutting_down )
throw std : : runtime_error ( " Unable to obtain LokiMQ control socket: proxy thread is shutting down " ) ;
auto control = std : : make_shared < zmq : : socket_t > ( context , zmq : : socket_type : : dealer ) ;
control - > setsockopt < int > ( ZMQ_LINGER , 0 ) ;
control - > connect ( SN_ADDR_COMMAND ) ;
thread_control_sockets . push_back ( control ) ;
control_sockets . emplace ( object_id , control ) ;
last . first = object_id ;
last . second = std : : move ( control ) ;
return * last . second ;
}
LokiMQ : : LokiMQ (
std : : string pubkey_ ,
std : : string privkey_ ,
bool service_node ,
SNRemoteAddress lookup ,
2020-02-06 01:21:27 +01:00
Logger logger )
2020-02-03 03:39:26 +01:00
: object_id { next_id + + } , pubkey { std : : move ( pubkey_ ) } , privkey { std : : move ( privkey_ ) } , local_service_node { service_node } ,
2020-02-28 05:16:43 +01:00
sn_lookup { std : : move ( lookup ) } , logger { std : : move ( logger ) }
{
2020-02-03 03:39:26 +01:00
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " Constructing listening LokiMQ, id= " , object_id , " , this= " , this ) ;
2020-02-03 03:39:26 +01:00
if ( pubkey . empty ( ) ! = privkey . empty ( ) ) {
throw std : : invalid_argument ( " LokiMQ construction failed: one (and only one) of pubkey/privkey is empty. Both must be specified, or both empty to generate a key. " ) ;
} else if ( pubkey . empty ( ) ) {
if ( service_node )
throw std : : invalid_argument ( " Cannot construct a service node mode LokiMQ without a keypair " ) ;
LMQ_LOG ( debug , " generating x25519 keypair for remote-only LokiMQ instance " ) ;
pubkey . resize ( crypto_box_PUBLICKEYBYTES ) ;
privkey . resize ( crypto_box_SECRETKEYBYTES ) ;
crypto_box_keypair ( reinterpret_cast < unsigned char * > ( & pubkey [ 0 ] ) , reinterpret_cast < unsigned char * > ( & privkey [ 0 ] ) ) ;
} else if ( pubkey . size ( ) ! = crypto_box_PUBLICKEYBYTES ) {
throw std : : invalid_argument ( " pubkey has invalid size " + std : : to_string ( pubkey . size ( ) ) + " , expected " + std : : to_string ( crypto_box_PUBLICKEYBYTES ) ) ;
} else if ( privkey . size ( ) ! = crypto_box_SECRETKEYBYTES ) {
throw std : : invalid_argument ( " privkey has invalid size " + std : : to_string ( privkey . size ( ) ) + " , expected " + std : : to_string ( crypto_box_SECRETKEYBYTES ) ) ;
} else {
// Verify the pubkey. We could get by with taking just the privkey and just generate this
// for ourselves, but this provides an extra check to make sure we and the caller agree
// cryptographically (e.g. to make sure they don't pass us an ed25519 keypair by mistake)
2020-02-13 05:53:43 +01:00
std : : string verify_pubkey ( crypto_box_PUBLICKEYBYTES , 0 ) ;
2020-02-13 05:39:00 +01:00
crypto_scalarmult_base ( reinterpret_cast < unsigned char * > ( & verify_pubkey [ 0 ] ) , reinterpret_cast < unsigned char * > ( & privkey [ 0 ] ) ) ;
2020-02-03 03:39:26 +01:00
if ( verify_pubkey ! = pubkey )
throw std : : invalid_argument ( " Invalid pubkey/privkey values given to LokiMQ construction: pubkey verification failed " ) ;
}
}
void LokiMQ : : start ( ) {
2020-02-06 01:21:27 +01:00
if ( proxy_thread . joinable ( ) )
2020-02-03 03:39:26 +01:00
throw std : : logic_error ( " Cannot call start() multiple times! " ) ;
2020-02-28 05:16:43 +01:00
// If we're not binding to anything then we don't listen, i.e. we can only establish outbound
// connections. Don't allow this if we are in service_node mode because, if we aren't
// listening, we are useless as a service node.
if ( bind . empty ( ) & & local_service_node )
throw std : : invalid_argument { " Cannot create a service node listener with no address(es) to bind " } ;
2020-02-03 03:39:26 +01:00
LMQ_LOG ( info , " Initializing LokiMQ " , bind . empty ( ) ? " remote-only " : " listener " , " with pubkey " , to_hex ( pubkey ) ) ;
// We bind `command` here so that the `get_control_socket()` below is always connecting to a
// bound socket, but we do nothing else here: the proxy thread is responsible for everything
// except binding it.
command . bind ( SN_ADDR_COMMAND ) ;
proxy_thread = std : : thread { & LokiMQ : : proxy_loop , this } ;
LMQ_LOG ( debug , " Waiting for proxy thread to get ready... " ) ;
auto & control = get_control_socket ( ) ;
detail : : send_control ( control , " START " ) ;
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " Sent START command " ) ;
2020-02-03 03:39:26 +01:00
zmq : : message_t ready_msg ;
std : : vector < zmq : : message_t > parts ;
2020-02-06 05:50:31 +01:00
try { recv_message_parts ( control , parts ) ; }
2020-02-03 03:39:26 +01:00
catch ( const zmq : : error_t & e ) { throw std : : runtime_error ( " Failure reading from LokiMQ::Proxy thread: " s + e . what ( ) ) ; }
if ( ! ( parts . size ( ) = = 1 & & view ( parts . front ( ) ) = = " READY " ) )
throw std : : runtime_error ( " Invalid startup message from proxy thread (didn't get expected READY message) " ) ;
LMQ_LOG ( debug , " Proxy thread is ready " ) ;
}
void LokiMQ : : worker_thread ( unsigned int index ) {
std : : string worker_id = " w " + std : : to_string ( index ) ;
zmq : : socket_t sock { context , zmq : : socket_type : : dealer } ;
sock . setsockopt ( ZMQ_ROUTING_ID , worker_id . data ( ) , worker_id . size ( ) ) ;
LMQ_LOG ( debug , " New worker thread " , worker_id , " started " ) ;
sock . connect ( SN_ADDR_WORKERS ) ;
2020-02-28 05:16:43 +01:00
Message message { * this , 0 } ;
2020-02-03 03:39:26 +01:00
std : : vector < zmq : : message_t > parts ;
2020-02-06 01:21:27 +01:00
run_info & run = workers [ index ] ; // This contains our first job, and will be updated later with subsequent jobs
2020-02-03 03:39:26 +01:00
while ( true ) {
try {
2020-02-06 01:21:27 +01:00
if ( run . is_batch_job ) {
if ( run . batch_jobno > = 0 ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " worker thread " , worker_id , " running batch " , run . batch , " # " , run . batch_jobno ) ;
2020-02-06 01:21:27 +01:00
run . batch - > run_job ( run . batch_jobno ) ;
} else if ( run . batch_jobno = = - 1 ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " worker thread " , worker_id , " running batch " , run . batch , " completion " ) ;
2020-02-06 01:21:27 +01:00
run . batch - > job_completion ( ) ;
}
} else {
2020-02-28 05:16:43 +01:00
message . conn = run . conn ;
message . route = run . conn_route ;
2020-02-06 01:21:27 +01:00
message . data . clear ( ) ;
2020-02-11 07:30:07 +01:00
2020-02-28 05:16:43 +01:00
LMQ_TRACE ( " Got incoming command from " , message . conn , message . route . empty ( ) ? " (outgoing) " : " (incoming) " ) ;
2020-02-17 03:42:03 +01:00
2020-02-11 07:30:07 +01:00
if ( run . callback - > second /*is_request*/ ) {
message . reply_tag = { run . data_parts [ 0 ] . data < char > ( ) , run . data_parts [ 0 ] . size ( ) } ;
for ( auto it = run . data_parts . begin ( ) + 1 ; it ! = run . data_parts . end ( ) ; + + it )
message . data . emplace_back ( it - > data < char > ( ) , it - > size ( ) ) ;
} else {
for ( auto & m : run . data_parts )
message . data . emplace_back ( m . data < char > ( ) , m . size ( ) ) ;
}
2020-02-06 01:21:27 +01:00
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " worker thread " , worker_id , " invoking " , run . command , " callback with " , message . data . size ( ) , " message parts " ) ;
2020-02-11 07:30:07 +01:00
run . callback - > first ( message ) ;
2020-02-06 01:21:27 +01:00
}
2020-02-03 03:39:26 +01:00
}
catch ( const bt_deserialize_invalid & e ) {
LMQ_LOG ( warn , worker_id , " deserialization failed: " , e . what ( ) , " ; ignoring request " ) ;
}
catch ( const mapbox : : util : : bad_variant_access & e ) {
LMQ_LOG ( warn , worker_id , " deserialization failed: found unexpected serialized type ( " , e . what ( ) , " ); ignoring request " ) ;
}
catch ( const std : : out_of_range & e ) {
LMQ_LOG ( warn , worker_id , " deserialization failed: invalid data - required field missing ( " , e . what ( ) , " ); ignoring request " ) ;
}
catch ( const std : : exception & e ) {
LMQ_LOG ( warn , worker_id , " caught exception when processing command: " , e . what ( ) ) ;
}
catch ( . . . ) {
LMQ_LOG ( warn , worker_id , " caught non-standard exception when processing command " ) ;
}
while ( true ) {
// Signal that we are ready for another job and wait for it. (We do this down here
// because our first job gets set up when the thread is started).
2020-02-06 01:21:27 +01:00
detail : : send_control ( sock , " RAN " ) ;
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " worker " , worker_id , " waiting for requests " ) ;
2020-02-03 03:39:26 +01:00
parts . clear ( ) ;
2020-02-06 05:50:31 +01:00
recv_message_parts ( sock , parts ) ;
2020-02-03 03:39:26 +01:00
if ( parts . size ( ) ! = 1 ) {
LMQ_LOG ( error , " Internal error: worker " , worker_id , " received invalid " , parts . size ( ) , " -part worker instruction " ) ;
continue ;
}
auto command = view ( parts [ 0 ] ) ;
if ( command = = " RUN " ) {
LMQ_LOG ( debug , " worker " , worker_id , " running command " , run . command ) ;
break ; // proxy has set up a command for us, go back and run it.
} else if ( command = = " QUIT " ) {
LMQ_LOG ( debug , " worker " , worker_id , " shutting down " ) ;
detail : : send_control ( sock , " QUITTING " ) ;
sock . setsockopt < int > ( ZMQ_LINGER , 1000 ) ;
sock . close ( ) ;
return ;
} else {
LMQ_LOG ( error , " Internal error: worker " , worker_id , " received invalid command: ` " , command , " ' " ) ;
}
}
}
}
void LokiMQ : : proxy_quit ( ) {
LMQ_LOG ( debug , " Received quit command, shutting down proxy thread " ) ;
2020-02-28 05:16:43 +01:00
assert ( std : : none_of ( workers . begin ( ) , workers . end ( ) , [ ] ( auto & worker ) { return worker . worker_thread . joinable ( ) ; } ) ) ;
2020-02-03 03:39:26 +01:00
command . setsockopt < int > ( ZMQ_LINGER , 0 ) ;
command . close ( ) ;
{
std : : lock_guard < std : : mutex > lock { control_sockets_mutex } ;
for ( auto & control : thread_control_sockets )
control - > close ( ) ;
proxy_shutting_down = true ; // To prevent threads from opening new control sockets
}
workers_socket . close ( ) ;
int linger = std : : chrono : : milliseconds { CLOSE_LINGER } . count ( ) ;
2020-02-28 05:16:43 +01:00
for ( auto & s : connections )
s . setsockopt ( ZMQ_LINGER , linger ) ;
connections . clear ( ) ;
peers . clear ( ) ;
2020-02-03 03:39:26 +01:00
LMQ_LOG ( debug , " Proxy thread teardown complete " ) ;
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : setup_outgoing_socket ( zmq : : socket_t & socket , string_view remote_pubkey ) {
2020-02-28 05:16:43 +01:00
if ( ! remote_pubkey . empty ( ) ) {
2020-02-11 07:30:07 +01:00
socket . setsockopt ( ZMQ_CURVE_SERVERKEY , remote_pubkey . data ( ) , remote_pubkey . size ( ) ) ;
2020-02-28 05:16:43 +01:00
socket . setsockopt ( ZMQ_CURVE_PUBLICKEY , pubkey . data ( ) , pubkey . size ( ) ) ;
socket . setsockopt ( ZMQ_CURVE_SECRETKEY , privkey . data ( ) , privkey . size ( ) ) ;
}
2020-02-11 07:30:07 +01:00
socket . setsockopt ( ZMQ_HANDSHAKE_IVL , ( int ) HANDSHAKE_TIME . count ( ) ) ;
socket . setsockopt < int64_t > ( ZMQ_MAXMSGSIZE , MAX_MSG_SIZE ) ;
socket . setsockopt ( ZMQ_ROUTING_ID , pubkey . data ( ) , pubkey . size ( ) ) ;
}
2020-02-03 03:39:26 +01:00
std : : pair < zmq : : socket_t * , std : : string >
2020-02-28 05:16:43 +01:00
LokiMQ : : proxy_connect_sn ( string_view remote , string_view connect_hint , bool optional , bool incoming_only , std : : chrono : : milliseconds keep_alive ) {
ConnectionID remote_cid { remote } ;
auto its = peers . equal_range ( remote_cid ) ;
peer_info * peer = nullptr ;
for ( auto it = its . first ; it ! = its . second ; + + it ) {
if ( incoming_only & & it - > second . route . empty ( ) )
continue ; // outgoing connection but we were asked to only use incoming connections
peer = & it - > second ;
break ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
if ( peer ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " proxy asked to connect to " , to_hex ( remote ) , " ; reusing existing connection " ) ;
2020-02-28 05:16:43 +01:00
if ( peer - > route . empty ( ) /* == outgoing*/ ) {
if ( peer - > idle_expiry < keep_alive ) {
2020-02-03 03:39:26 +01:00
LMQ_LOG ( debug , " updating existing outgoing peer connection idle expiry time from " ,
2020-02-28 05:16:43 +01:00
peer - > idle_expiry . count ( ) , " ms to " , keep_alive . count ( ) , " ms " ) ;
peer - > idle_expiry = keep_alive ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
peer - > activity ( ) ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
return { & connections [ peer - > conn_index ] , peer - > route } ;
2020-02-03 03:39:26 +01:00
} else if ( optional | | incoming_only ) {
2020-02-28 05:16:43 +01:00
LMQ_LOG ( debug , " proxy asked for optional or incoming connection, but no appropriate connection exists so aborting connection attempt " ) ;
return { nullptr , " " s } ;
2020-02-03 03:39:26 +01:00
}
// No connection so establish a new one
LMQ_LOG ( debug , " proxy establishing new outbound connection to " , to_hex ( remote ) ) ;
std : : string addr ;
2020-02-17 22:59:01 +01:00
bool to_self = false & & remote = = pubkey ; // FIXME; need to use a separate listening socket for this, otherwise we can't easily
// tell it wasn't from a remote.
if ( to_self ) {
2020-02-03 03:39:26 +01:00
// special inproc connection if self that doesn't need any external connection
addr = SN_ADDR_SELF ;
} else {
2020-02-25 03:20:56 +01:00
addr = std : : string { connect_hint } ;
2020-02-03 03:39:26 +01:00
if ( addr . empty ( ) )
2020-02-28 05:16:43 +01:00
addr = sn_lookup ( remote ) ;
2020-02-03 03:39:26 +01:00
else
LMQ_LOG ( debug , " using connection hint " , connect_hint ) ;
if ( addr . empty ( ) ) {
LMQ_LOG ( error , " peer lookup failed for " , to_hex ( remote ) ) ;
2020-02-28 05:16:43 +01:00
return { nullptr , " " s } ;
2020-02-03 03:39:26 +01:00
}
}
2020-02-28 05:16:43 +01:00
LMQ_LOG ( debug , to_hex ( pubkey ) , " (me) connecting to " , addr , " to reach " , to_hex ( remote ) ) ;
2020-02-03 03:39:26 +01:00
zmq : : socket_t socket { context , zmq : : socket_type : : dealer } ;
2020-02-11 07:30:07 +01:00
setup_outgoing_socket ( socket , remote ) ;
2020-02-03 03:39:26 +01:00
socket . connect ( addr ) ;
2020-02-28 05:16:43 +01:00
peer_info p { } ;
p . service_node = true ;
p . pubkey = std : : string { remote } ;
p . conn_index = connections . size ( ) ;
p . idle_expiry = keep_alive ;
p . activity ( ) ;
peers . emplace ( std : : move ( remote_cid ) , std : : move ( p ) ) ;
connections . push_back ( std : : move ( socket ) ) ;
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
return { & connections . back ( ) , " " s } ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
std : : pair < zmq : : socket_t * , std : : string > LokiMQ : : proxy_connect_sn ( bt_dict_consumer data ) {
string_view hint , remote_pk ;
std : : chrono : : milliseconds keep_alive ;
bool optional = false , incoming_only = false ;
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
// Alphabetical order
if ( data . skip_until ( " hint " ) )
hint = data . consume_string ( ) ;
if ( data . skip_until ( " incoming " ) )
incoming_only = data . consume_integer < bool > ( ) ;
if ( data . skip_until ( " keep-alive " ) )
keep_alive = std : : chrono : : milliseconds { data . consume_integer < uint64_t > ( ) } ;
if ( data . skip_until ( " optional " ) )
optional = data . consume_integer < bool > ( ) ;
if ( ! data . skip_until ( " pubkey " ) )
throw std : : runtime_error ( " Internal error: Invalid proxy_connect_sn command; pubkey missing " ) ;
remote_pk = data . consume_string ( ) ;
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
return proxy_connect_sn ( remote_pk , hint , optional , incoming_only , keep_alive ) ;
2020-02-03 03:39:26 +01:00
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : proxy_send ( bt_dict_consumer data ) {
// NB: bt_dict_consumer goes in alphabetical order
2020-02-25 03:20:56 +01:00
string_view hint ;
2020-02-11 07:30:07 +01:00
std : : chrono : : milliseconds keep_alive { DEFAULT_SEND_KEEP_ALIVE } ;
bool optional = false ;
bool incoming = false ;
bool request = false ;
2020-02-28 05:16:43 +01:00
bool have_conn_id = false ;
ConnectionID conn_id ;
string_view conn_route ;
2020-02-11 07:30:07 +01:00
std : : string request_tag ;
std : : unique_ptr < ReplyCallback > request_cbptr ;
2020-02-28 05:16:43 +01:00
if ( data . skip_until ( " conn_id " ) ) {
conn_id . id = data . consume_integer < long long > ( ) ;
if ( conn_id . id = = - 1 )
throw std : : runtime_error ( " Invalid error: invalid conn_id value (-1) " ) ;
have_conn_id = true ;
}
if ( data . skip_until ( " conn_route " ) )
conn_route = data . consume_string_view ( ) ;
2020-02-11 07:30:07 +01:00
if ( data . skip_until ( " hint " ) )
2020-02-25 03:20:56 +01:00
hint = data . consume_string_view ( ) ;
2020-02-11 07:30:07 +01:00
if ( data . skip_until ( " incoming " ) )
incoming = data . consume_integer < bool > ( ) ;
if ( data . skip_until ( " keep-alive " ) )
keep_alive = std : : chrono : : milliseconds { data . consume_integer < uint64_t > ( ) } ;
if ( data . skip_until ( " optional " ) )
optional = data . consume_integer < bool > ( ) ;
2020-02-28 05:16:43 +01:00
if ( data . skip_until ( " pubkey " ) ) {
if ( have_conn_id )
throw std : : runtime_error ( " Internal error: Invalid proxy send command; conn_id and pubkey are exclusive " ) ;
conn_id . pk = data . consume_string ( ) ;
conn_id . id = ConnectionID : : SN_ID ;
} else if ( ! have_conn_id )
throw std : : runtime_error ( " Internal error: Invalid proxy send command; pubkey or conn_id missing " ) ;
2020-02-11 07:30:07 +01:00
if ( data . skip_until ( " request " ) )
request = data . consume_integer < bool > ( ) ;
if ( request ) {
if ( ! data . skip_until ( " request_callback " ) )
throw std : : runtime_error ( " Internal error: received request without request_callback " ) ;
request_cbptr . reset ( reinterpret_cast < ReplyCallback * > ( data . consume_integer < uintptr_t > ( ) ) ) ;
if ( ! data . skip_until ( " request_tag " ) )
throw std : : runtime_error ( " Internal error: received request without request_name " ) ;
request_tag = data . consume_string ( ) ;
}
if ( ! data . skip_until ( " send " ) )
throw std : : runtime_error ( " Internal error: Invalid proxy send command; send parts missing " ) ;
bt_list_consumer send = data . consume_list_consumer ( ) ;
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
zmq : : socket_t * send_to ;
std : : string routing_prefix ;
if ( conn_id . sn ( ) ) {
auto sock_route = proxy_connect_sn ( conn_id . pk , hint , optional , incoming , keep_alive ) ;
if ( ! sock_route . first ) {
if ( optional )
LMQ_LOG ( debug , " Not sending: send is optional and no connection to " ,
to_hex ( conn_id . pk ) , " is currently established " ) ;
else
LMQ_LOG ( error , " Unable to send to " , to_hex ( conn_id . pk ) , " : no connection address found " ) ;
return ;
}
send_to = sock_route . first ;
routing_prefix = std : : move ( sock_route . second ) ;
} else if ( ! conn_route . empty ( ) ) { // incoming non-SN connection
auto it = incoming_conn_index . find ( conn_id ) ;
if ( it = = incoming_conn_index . end ( ) ) {
LMQ_LOG ( warn , " Unable to send to " , conn_id , " : incoming listening socket not found " ) ;
return ;
}
send_to = & connections [ it - > second ] ;
routing_prefix = std : : string { conn_route } ;
} else {
auto pr = peers . equal_range ( conn_id ) ;
if ( pr . first = = peers . end ( ) ) {
LMQ_LOG ( warn , " Unable to send: connection id " , conn_id , " is not (or is no longer) a valid connection " ) ;
return ;
}
auto & peer = pr . first - > second ;
send_to = & connections [ peer . conn_index ] ;
2020-02-03 03:39:26 +01:00
}
2020-02-11 07:30:07 +01:00
if ( request ) {
2020-02-21 01:13:29 +01:00
LMQ_LOG ( debug , " Added new pending request " , to_hex ( request_tag ) ) ;
2020-02-11 07:30:07 +01:00
pending_requests . insert ( { request_tag , {
std : : chrono : : steady_clock : : now ( ) + REQUEST_TIMEOUT , std : : move ( * request_cbptr ) } } ) ;
}
2020-02-03 03:39:26 +01:00
try {
2020-02-28 05:16:43 +01:00
send_message_parts ( * send_to , build_send_parts ( send , routing_prefix ) ) ;
2020-02-03 03:39:26 +01:00
} catch ( const zmq : : error_t & e ) {
2020-02-28 05:16:43 +01:00
if ( e . num ( ) = = EHOSTUNREACH & & ! routing_prefix . empty ( ) /*= incoming conn*/ ) {
LMQ_LOG ( debug , " Incoming connection is no longer valid; removing peer details " ) ;
// Our incoming connection no longer exists; remove it from `peers`.
auto pr = peers . equal_range ( conn_id ) ;
if ( pr . first ! = peers . end ( ) ) {
if ( ! conn_id . sn ( ) ) {
peers . erase ( pr . first ) ;
} else {
bool removed ;
for ( auto it = pr . first ; it ! = pr . second ; ) {
auto & peer = it - > second ;
if ( peer . route = = routing_prefix ) {
peers . erase ( it ) ;
removed = true ;
break ;
}
}
// The incoming connection to the SN is no longer good, but we can retry because
// we may have another active connection with the SN (or may want to open one).
if ( removed ) {
LMQ_LOG ( debug , " Retrying sending to SN " , to_hex ( conn_id . pk ) , " using other sockets " ) ;
return proxy_send ( std : : move ( data ) ) ;
}
}
}
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
LMQ_LOG ( warn , " Unable to send message to " , conn_id , " : " , e . what ( ) ) ;
2020-02-03 03:39:26 +01:00
}
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : proxy_reply ( bt_dict_consumer data ) {
2020-02-28 05:16:43 +01:00
bool have_conn_id = false ;
ConnectionID conn_id { 0 } ;
if ( data . skip_until ( " conn_id " ) ) {
conn_id . id = data . consume_integer < long long > ( ) ;
if ( conn_id . id = = - 1 )
throw std : : runtime_error ( " Invalid error: invalid conn_id value (-1) " ) ;
have_conn_id = true ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
if ( data . skip_until ( " pubkey " ) ) {
if ( have_conn_id )
throw std : : runtime_error ( " Internal error: Invalid proxy send command; conn_id and pubkey are exclusive " ) ;
conn_id . pk = data . consume_string ( ) ;
conn_id . id = ConnectionID : : SN_ID ;
} else if ( ! have_conn_id )
throw std : : runtime_error ( " Internal error: Invalid proxy send command; pubkey or conn_id missing " ) ;
2020-02-11 07:30:07 +01:00
if ( ! data . skip_until ( " send " ) )
throw std : : runtime_error ( " Internal error: Invalid proxy reply command; send parts missing " ) ;
2020-02-28 05:16:43 +01:00
2020-02-11 07:30:07 +01:00
bt_list_consumer send = data . consume_list_consumer ( ) ;
2020-02-28 05:16:43 +01:00
auto pr = peers . equal_range ( conn_id ) ;
if ( pr . first = = pr . second ) {
LMQ_LOG ( warn , " Unable to send tagged reply: the connection is no longer valid " ) ;
return ;
}
// We try any connections until one works (for ordinary remotes there will be just one, but for
// SNs there might be one incoming and one outgoing).
for ( auto it = pr . first ; it ! = pr . second ; ) {
try {
send_message_parts ( connections [ it - > second . conn_index ] , build_send_parts ( send , it - > second . route ) ) ;
break ;
} catch ( const zmq : : error_t & err ) {
if ( err . num ( ) = = EHOSTUNREACH ) {
LMQ_LOG ( info , " Unable to send reply to incoming non-SN request: remote is no longer connected " ) ;
LMQ_LOG ( debug , " Incoming connection is no longer valid; removing peer details " ) ;
it = peers . erase ( it ) ;
} else {
LMQ_LOG ( warn , " Unable to send reply to incoming non-SN request: " , err . what ( ) ) ;
+ + it ;
}
2020-02-03 03:39:26 +01:00
}
}
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : proxy_batch ( detail : : Batch * batch ) {
batches . insert ( batch ) ;
2020-02-06 01:21:27 +01:00
const int jobs = batch - > size ( ) ;
for ( int i = 0 ; i < jobs ; i + + )
batch_jobs . emplace ( batch , i ) ;
2020-02-28 22:54:00 +01:00
proxy_skip_one_poll = true ;
2020-02-06 01:21:27 +01:00
}
2020-02-28 22:54:00 +01:00
void LokiMQ : : proxy_schedule_reply_job ( std : : function < void ( ) > f ) {
2020-02-11 07:29:00 +01:00
auto * b = new Batch < void > ;
b - > add_job ( std : : move ( f ) ) ;
batches . insert ( b ) ;
2020-02-28 22:54:00 +01:00
reply_jobs . emplace ( static_cast < detail : : Batch * > ( b ) , 0 ) ;
proxy_skip_one_poll = true ;
2020-02-11 07:29:00 +01:00
}
// Called either within the proxy thread, or before the proxy thread has been created; actually adds
// the timer. If the timer object hasn't been set up yet it gets set up here.
void LokiMQ : : proxy_timer ( std : : function < void ( ) > job , std : : chrono : : milliseconds interval , bool squelch ) {
if ( ! timers )
timers . reset ( zmq_timers_new ( ) ) ;
int timer_id = zmq_timers_add ( timers . get ( ) ,
interval . count ( ) ,
[ ] ( int timer_id , void * self ) { static_cast < LokiMQ * > ( self ) - > _queue_timer_job ( timer_id ) ; } ,
this ) ;
if ( timer_id = = - 1 )
throw zmq : : error_t { } ;
timer_jobs [ timer_id ] = { std : : move ( job ) , squelch , false } ;
}
void LokiMQ : : proxy_timer ( bt_list_consumer timer_data ) {
std : : unique_ptr < std : : function < void ( ) > > func { reinterpret_cast < std : : function < void ( ) > * > ( timer_data . consume_integer < uintptr_t > ( ) ) } ;
auto interval = std : : chrono : : milliseconds { timer_data . consume_integer < uint64_t > ( ) } ;
auto squelch = timer_data . consume_integer < bool > ( ) ;
if ( ! timer_data . is_finished ( ) )
throw std : : runtime_error ( " Internal error: proxied timer request contains unexpected data " ) ;
proxy_timer ( std : : move ( * func ) , interval , squelch ) ;
}
2020-02-06 05:50:31 +01:00
void LokiMQ : : proxy_control_message ( std : : vector < zmq : : message_t > & parts ) {
if ( parts . size ( ) < 2 )
2020-02-03 03:39:26 +01:00
throw std : : logic_error ( " Expected 2-3 message parts for a proxy control message " ) ;
auto route = view ( parts [ 0 ] ) , cmd = view ( parts [ 1 ] ) ;
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " control message: " , cmd ) ;
2020-02-06 05:50:31 +01:00
if ( parts . size ( ) = = 3 ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " ...: " , parts [ 2 ] ) ;
2020-02-06 05:50:31 +01:00
if ( cmd = = " SEND " ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " proxying message " ) ;
2020-02-11 07:30:07 +01:00
return proxy_send ( view ( parts [ 2 ] ) ) ;
2020-02-06 05:50:31 +01:00
} else if ( cmd = = " REPLY " ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " proxying reply to non-SN incoming message " ) ;
2020-02-11 07:30:07 +01:00
return proxy_reply ( view ( parts [ 2 ] ) ) ;
2020-02-06 05:50:31 +01:00
} else if ( cmd = = " BATCH " ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " proxy batch jobs " ) ;
2020-02-06 05:50:31 +01:00
auto ptrval = bt_deserialize < uintptr_t > ( view ( parts [ 2 ] ) ) ;
return proxy_batch ( reinterpret_cast < detail : : Batch * > ( ptrval ) ) ;
2020-02-11 07:30:07 +01:00
} else if ( cmd = = " CONNECT_SN " ) {
2020-02-28 05:16:43 +01:00
proxy_connect_sn ( view ( parts [ 2 ] ) ) ;
2020-02-11 07:30:07 +01:00
return ;
} else if ( cmd = = " CONNECT_REMOTE " ) {
2020-02-28 05:16:43 +01:00
return proxy_connect_remote ( view ( parts [ 2 ] ) ) ;
} else if ( cmd = = " DISCONNECT " ) {
return proxy_disconnect ( view ( parts [ 2 ] ) ) ;
2020-02-11 07:29:00 +01:00
} else if ( cmd = = " TIMER " ) {
return proxy_timer ( view ( parts [ 2 ] ) ) ;
2020-02-06 05:50:31 +01:00
}
} else if ( parts . size ( ) = = 2 ) {
if ( cmd = = " START " ) {
// Command send by the owning thread during startup; we send back a simple READY reply to
// let it know we are running.
return route_control ( command , route , " READY " ) ;
} else if ( cmd = = " QUIT " ) {
// Asked to quit: set max_workers to zero and tell any idle ones to quit. We will
// close workers as they come back to READY status, and then close external
// connections once all workers are done.
max_workers = 0 ;
for ( const auto & route : idle_workers )
2020-02-28 05:16:43 +01:00
route_control ( workers_socket , workers [ route ] . worker_routing_id , " QUIT " ) ;
2020-02-06 05:50:31 +01:00
idle_workers . clear ( ) ;
return ;
}
2020-02-03 03:39:26 +01:00
}
2020-02-06 05:50:31 +01:00
throw std : : runtime_error ( " Proxy received invalid control command: " + std : : string { cmd } +
" ( " + std : : to_string ( parts . size ( ) ) + " ) " ) ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
template < typename Container , typename AccessIndex >
void update_connection_indices ( Container & c , size_t index , AccessIndex get_index ) {
for ( auto it = c . begin ( ) ; it ! = c . end ( ) ; ) {
size_t & i = get_index ( * it ) ;
if ( index = = i ) {
it = c . erase ( it ) ;
continue ;
}
if ( index > i )
2020-02-11 07:30:07 +01:00
- - i ;
2020-02-28 05:16:43 +01:00
+ + it ;
2020-02-11 07:30:07 +01:00
}
}
2020-02-28 05:16:43 +01:00
void LokiMQ : : proxy_close_connection ( size_t index , std : : chrono : : milliseconds linger ) {
connections [ index ] . setsockopt < int > ( ZMQ_LINGER , linger > 0 ms ? linger . count ( ) : 0 ) ;
pollitems_stale = true ;
connections . erase ( connections . begin ( ) + index ) ;
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
update_connection_indices ( peers , index ,
[ ] ( auto & p ) - > size_t & { return p . second . conn_index ; } ) ;
update_connection_indices ( pending_connects , index ,
[ ] ( auto & pc ) - > size_t & { return std : : get < size_t > ( pc ) ; } ) ;
update_connection_indices ( bind , index ,
[ ] ( auto & b ) - > size_t & { return b . second . index ; } ) ;
update_connection_indices ( incoming_conn_index , index ,
[ ] ( auto & oci ) - > size_t & { return oci . second ; } ) ;
assert ( index < conn_index_to_id . size ( ) ) ;
conn_index_to_id . erase ( conn_index_to_id . begin ( ) + index ) ;
2020-02-03 03:39:26 +01:00
}
void LokiMQ : : proxy_expire_idle_peers ( ) {
for ( auto it = peers . begin ( ) ; it ! = peers . end ( ) ; ) {
auto & info = it - > second ;
2020-02-28 05:16:43 +01:00
if ( info . outgoing ( ) ) {
2020-02-03 03:39:26 +01:00
auto idle = info . last_activity - std : : chrono : : steady_clock : : now ( ) ;
if ( idle < = info . idle_expiry ) {
+ + it ;
continue ;
}
2020-02-28 05:16:43 +01:00
LMQ_LOG ( info , " Closing outgoing connection to " , it - > first , " : idle timeout reached " ) ;
proxy_close_connection ( info . conn_index , CLOSE_LINGER ) ;
it = peers . erase ( it ) ;
2020-02-03 03:39:26 +01:00
}
}
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : proxy_conn_cleanup ( ) {
// Drop idle connections (if we haven't done it in a while) but *only* if we have some idle
// general workers: if we don't have any idle workers then we may still have incoming messages which
// we haven't processed yet and those messages might end up resetting the last activity time.
2020-02-28 22:54:00 +01:00
if ( static_cast < int > ( workers . size ( ) ) < general_workers ) {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " closing idle connections " ) ;
2020-02-11 07:30:07 +01:00
proxy_expire_idle_peers ( ) ;
}
auto now = std : : chrono : : steady_clock : : now ( ) ;
2020-02-28 05:16:43 +01:00
// FIXME - check other outgoing connections to see if they died and if so purge them
2020-02-11 07:30:07 +01:00
// Check any pending outgoing connections for timeout
for ( auto it = pending_connects . begin ( ) ; it ! = pending_connects . end ( ) ; ) {
auto & pc = * it ;
2020-02-28 05:16:43 +01:00
if ( std : : get < std : : chrono : : steady_clock : : time_point > ( pc ) < now ) {
job ( [ cid = ConnectionID { std : : get < long long > ( pc ) } , callback = std : : move ( std : : get < ConnectFailure > ( pc ) ) ] { callback ( cid , " connection attempt timed out " ) ; } ) ;
it = pending_connects . erase ( it ) ; // Don't let the below erase it (because it invalidates iterators)
proxy_close_connection ( std : : get < size_t > ( pc ) , CLOSE_LINGER ) ;
2020-02-11 07:30:07 +01:00
} else {
+ + it ;
}
}
// Remove any expired pending requests and schedule their callback with a failure
for ( auto it = pending_requests . begin ( ) ; it ! = pending_requests . end ( ) ; ) {
auto & callback = it - > second ;
if ( callback . first < now ) {
2020-02-21 01:13:29 +01:00
LMQ_LOG ( debug , " pending request " , to_hex ( it - > first ) , " expired, invoking callback with failure status and removing " ) ;
2020-02-11 07:30:07 +01:00
job ( [ callback = std : : move ( callback . second ) ] { callback ( false , { } ) ; } ) ;
it = pending_requests . erase ( it ) ;
} else {
+ + it ;
}
}
} ;
2020-02-28 05:16:43 +01:00
void LokiMQ : : listen_curve ( std : : string bind_addr , AllowFunc allow_connection ) {
// TODO: there's no particular reason we can't start listening after starting up; just needs to
// be implemented. (But if we can start we'll probably also want to be able to stop, so it's
// more than just binding that needs implementing).
check_not_started ( proxy_thread , " start listening " ) ;
bind . emplace_back ( std : : move ( bind_addr ) , bind_data { true , std : : move ( allow_connection ) } ) ;
}
void LokiMQ : : listen_plain ( std : : string bind_addr , AllowFunc allow_connection ) {
// TODO: As above.
check_not_started ( proxy_thread , " start listening " ) ;
bind . emplace_back ( std : : move ( bind_addr ) , bind_data { false , std : : move ( allow_connection ) } ) ;
}
2020-02-03 03:39:26 +01:00
void LokiMQ : : proxy_loop ( ) {
2020-02-28 05:16:43 +01:00
2020-02-03 03:39:26 +01:00
zap_auth . setsockopt < int > ( ZMQ_LINGER , 0 ) ;
zap_auth . bind ( ZMQ_ADDR_ZAP ) ;
workers_socket . setsockopt < int > ( ZMQ_ROUTER_MANDATORY , 1 ) ;
workers_socket . bind ( SN_ADDR_WORKERS ) ;
2020-02-28 22:54:00 +01:00
assert ( general_workers > 0 ) ;
if ( batch_jobs_reserved < 0 )
batch_jobs_reserved = ( general_workers + 1 ) / 2 ;
if ( reply_jobs_reserved < 0 )
reply_jobs_reserved = ( general_workers + 7 ) / 8 ;
2020-02-03 03:39:26 +01:00
2020-02-28 22:54:00 +01:00
max_workers = general_workers + batch_jobs_reserved + reply_jobs_reserved ;
2020-02-06 01:21:27 +01:00
for ( const auto & cat : categories ) {
2020-02-03 03:39:26 +01:00
max_workers + = cat . second . reserved_threads ;
2020-02-06 01:21:27 +01:00
}
2020-02-13 03:10:40 +01:00
# ifndef NDEBUG
2020-02-06 01:21:27 +01:00
if ( log_level ( ) > = LogLevel : : trace ) {
2020-02-25 03:20:56 +01:00
LMQ_TRACE ( " Reserving space for " , max_workers , " max workers = " , general_workers , " general plus reservations for: " ) ;
2020-02-06 01:21:27 +01:00
for ( const auto & cat : categories )
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " - " , cat . first , " : " , cat . second . reserved_threads ) ;
LMQ_TRACE ( " - (batch jobs): " , batch_jobs_reserved ) ;
2020-02-28 22:54:00 +01:00
LMQ_TRACE ( " - (reply jobs): " , reply_jobs_reserved ) ;
2020-02-06 01:21:27 +01:00
}
2020-02-13 03:10:40 +01:00
# endif
2020-02-03 03:39:26 +01:00
workers . reserve ( max_workers ) ;
if ( ! workers . empty ( ) )
throw std : : logic_error ( " Internal error: proxy thread started with active worker threads " ) ;
2020-02-28 05:16:43 +01:00
for ( size_t i = 0 ; i < bind . size ( ) ; i + + ) {
auto & b = bind [ i ] . second ;
zmq : : socket_t listener { context , zmq : : socket_type : : router } ;
std : : string auth_domain = bt_serialize ( i ) ;
listener . setsockopt ( ZMQ_ZAP_DOMAIN , auth_domain . c_str ( ) , auth_domain . size ( ) ) ;
if ( b . curve ) {
listener . setsockopt < int > ( ZMQ_CURVE_SERVER , 1 ) ;
listener . setsockopt ( ZMQ_CURVE_PUBLICKEY , pubkey . data ( ) , pubkey . size ( ) ) ;
listener . setsockopt ( ZMQ_CURVE_SECRETKEY , privkey . data ( ) , privkey . size ( ) ) ;
}
2020-02-11 07:30:07 +01:00
listener . setsockopt ( ZMQ_HANDSHAKE_IVL , ( int ) HANDSHAKE_TIME . count ( ) ) ;
listener . setsockopt < int64_t > ( ZMQ_MAXMSGSIZE , MAX_MSG_SIZE ) ;
2020-02-03 03:39:26 +01:00
listener . setsockopt < int > ( ZMQ_ROUTER_HANDOVER , 1 ) ;
listener . setsockopt < int > ( ZMQ_ROUTER_MANDATORY , 1 ) ;
2020-02-28 05:16:43 +01:00
listener . bind ( bind [ i ] . first ) ;
LMQ_LOG ( info , " LokiMQ listening on " , bind [ i ] . first ) ;
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
connections . push_back ( std : : move ( listener ) ) ;
auto conn_id = next_conn_id + + ;
conn_index_to_id . push_back ( conn_id ) ;
incoming_conn_index [ conn_id ] = connections . size ( ) - 1 ;
b . index = connections . size ( ) - 1 ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
pollitems_stale = true ;
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
// Also add an internal connection to self so that calling code can avoid needing to
// special-case rare situations where we are supposed to talk to a quorum member that happens to
// be ourselves (which can happen, for example, with cross-quoum Blink communication)
// FIXME: not working
//listener.bind(SN_ADDR_SELF);
2020-02-03 03:39:26 +01:00
2020-02-11 07:29:00 +01:00
if ( ! timers )
timers . reset ( zmq_timers_new ( ) ) ;
2020-02-03 03:39:26 +01:00
2020-02-11 07:30:07 +01:00
auto do_conn_cleanup = [ this ] { proxy_conn_cleanup ( ) ; } ;
using CleanupLambda = decltype ( do_conn_cleanup ) ;
if ( - 1 = = zmq_timers_add ( timers . get ( ) ,
std : : chrono : : milliseconds { CONN_CHECK_INTERVAL } . count ( ) ,
// Wrap our lambda into a C function pointer where we pass in the lambda pointer as extra arg
[ ] ( int /*timer_id*/ , void * cleanup ) { ( * static_cast < CleanupLambda * > ( cleanup ) ) ( ) ; } ,
& do_conn_cleanup ) ) {
throw zmq : : error_t { } ;
}
2020-02-03 03:39:26 +01:00
std : : vector < zmq : : message_t > parts ;
while ( true ) {
2020-02-11 07:29:00 +01:00
std : : chrono : : milliseconds poll_timeout ;
2020-02-03 03:39:26 +01:00
if ( max_workers = = 0 ) { // Will be 0 only if we are quitting
2020-02-28 05:16:43 +01:00
if ( std : : none_of ( workers . begin ( ) , workers . end ( ) , [ ] ( auto & w ) { return w . worker_thread . joinable ( ) ; } ) ) {
2020-02-03 03:39:26 +01:00
// All the workers have finished, so we can finish shutting down
return proxy_quit ( ) ;
}
2020-02-11 07:29:00 +01:00
poll_timeout = 1 s ; // We don't keep running timers when we're quitting, so don't have a timer to check
} else {
poll_timeout = std : : chrono : : milliseconds { zmq_timers_timeout ( timers . get ( ) ) } ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 22:54:00 +01:00
if ( proxy_skip_one_poll )
proxy_skip_one_poll = false ;
2020-02-12 00:08:19 +01:00
else {
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " polling for new messages " ) ;
2020-02-28 05:16:43 +01:00
if ( pollitems_stale )
rebuild_pollitems ( ) ;
2020-02-12 00:08:19 +01:00
// We poll the control socket and worker socket for any incoming messages. If we have
// available worker room then also poll incoming connections and outgoing connections
// for messages to forward to a worker. Otherwise, we just look for a control message
// or a worker coming back with a ready message.
zmq : : poll ( pollitems . data ( ) , pollitems . size ( ) , poll_timeout ) ;
}
2020-02-03 03:39:26 +01:00
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " processing control messages " ) ;
2020-02-03 03:39:26 +01:00
// Retrieve any waiting incoming control messages
2020-02-06 05:50:31 +01:00
for ( parts . clear ( ) ; recv_message_parts ( command , parts , zmq : : recv_flags : : dontwait ) ; parts . clear ( ) ) {
proxy_control_message ( parts ) ;
2020-02-03 03:39:26 +01:00
}
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " processing worker messages " ) ;
2020-02-06 05:50:31 +01:00
for ( parts . clear ( ) ; recv_message_parts ( workers_socket , parts , zmq : : recv_flags : : dontwait ) ; parts . clear ( ) ) {
2020-02-06 01:21:27 +01:00
proxy_worker_message ( parts ) ;
2020-02-03 03:39:26 +01:00
}
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " processing timers " ) ;
2020-02-11 07:29:00 +01:00
zmq_timers_execute ( timers . get ( ) ) ;
2020-02-03 03:39:26 +01:00
// Handle any zap authentication
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " processing zap requests " ) ;
2020-02-28 05:16:43 +01:00
process_zap_requests ( ) ;
2020-02-03 03:39:26 +01:00
2020-02-06 01:21:27 +01:00
// See if we can drain anything from the current queue before we potentially add to it
// below.
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " processing queued jobs and messages " ) ;
2020-02-06 01:21:27 +01:00
proxy_process_queue ( ) ;
2020-02-03 03:39:26 +01:00
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " processing new incoming messages " ) ;
2020-02-03 03:39:26 +01:00
2020-02-06 01:21:27 +01:00
// We round-robin connections when pulling off pending messages one-by-one rather than
// pulling off all messages from one connection before moving to the next; thus in cases of
// contention we end up fairly distributing.
2020-02-28 05:16:43 +01:00
const int num_sockets = connections . size ( ) ;
2020-02-11 07:30:07 +01:00
std : : queue < int > queue_index ;
for ( int i = 0 ; i < num_sockets ; i + + )
2020-02-06 01:21:27 +01:00
queue_index . push ( i ) ;
2020-02-03 03:39:26 +01:00
2020-02-28 22:54:00 +01:00
for ( parts . clear ( ) ; ! queue_index . empty ( ) & & static_cast < int > ( workers . size ( ) ) < max_workers ; parts . clear ( ) ) {
2020-02-06 01:21:27 +01:00
size_t i = queue_index . front ( ) ;
queue_index . pop ( ) ;
2020-02-28 05:16:43 +01:00
auto & sock = connections [ i ] ;
2020-02-03 03:39:26 +01:00
2020-02-06 05:50:31 +01:00
if ( ! recv_message_parts ( sock , parts , zmq : : recv_flags : : dontwait ) )
2020-02-06 01:21:27 +01:00
continue ;
2020-02-03 03:39:26 +01:00
2020-02-06 01:21:27 +01:00
// We only pull this one message now but then requeue the socket so that after we check
// all other sockets we come back to this one to check again.
queue_index . push ( i ) ;
2020-02-03 03:39:26 +01:00
2020-02-06 01:21:27 +01:00
if ( parts . empty ( ) ) {
LMQ_LOG ( warn , " Ignoring empty (0-part) incoming message " ) ;
continue ;
2020-02-03 03:39:26 +01:00
}
2020-02-06 01:21:27 +01:00
if ( ! proxy_handle_builtin ( i , parts ) )
proxy_to_worker ( i , parts ) ;
2020-02-03 03:39:26 +01:00
}
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " done proxy loop " ) ;
2020-02-03 03:39:26 +01:00
}
}
2020-02-11 07:30:07 +01:00
std : : pair < LokiMQ : : category * , const std : : pair < LokiMQ : : CommandCallback , bool > * > LokiMQ : : get_command ( std : : string & command ) {
2020-02-03 03:39:26 +01:00
if ( command . size ( ) > MAX_CATEGORY_LENGTH + 1 + MAX_COMMAND_LENGTH ) {
LMQ_LOG ( warn , " Invalid command ' " , command , " ': command too long " ) ;
return { } ;
}
if ( ! command_aliases . empty ( ) ) {
auto it = command_aliases . find ( command ) ;
if ( it ! = command_aliases . end ( ) )
command = it - > second ;
}
auto dot = command . find ( ' . ' ) ;
if ( dot = = 0 | | dot = = std : : string : : npos ) {
LMQ_LOG ( warn , " Invalid command ' " , command , " ': expected <category>.<command> " ) ;
return { } ;
}
2020-02-25 03:20:56 +01:00
std : : string catname = command . substr ( 0 , dot ) ;
2020-02-03 03:39:26 +01:00
std : : string cmd = command . substr ( dot + 1 ) ;
auto catit = categories . find ( catname ) ;
if ( catit = = categories . end ( ) ) {
LMQ_LOG ( warn , " Invalid command category ' " , catname , " ' " ) ;
return { } ;
}
const auto & category = catit - > second ;
auto callback_it = category . commands . find ( cmd ) ;
if ( callback_it = = category . commands . end ( ) ) {
LMQ_LOG ( warn , " Invalid command ' " , command , " ' " ) ;
return { } ;
}
return { & catit - > second , & callback_it - > second } ;
}
2020-02-06 01:21:27 +01:00
void LokiMQ : : proxy_worker_message ( std : : vector < zmq : : message_t > & parts ) {
// Process messages sent by workers
if ( parts . size ( ) ! = 2 ) {
LMQ_LOG ( error , " Received send invalid " , parts . size ( ) , " -part message " ) ;
return ;
}
auto route = view ( parts [ 0 ] ) , cmd = view ( parts [ 1 ] ) ;
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " worker message from " , route ) ;
2020-02-06 01:21:27 +01:00
assert ( route . size ( ) > = 2 & & route [ 0 ] = = ' w ' & & route [ 1 ] > = ' 0 ' & & route [ 1 ] < = ' 9 ' ) ;
string_view worker_id_str { & route [ 1 ] , route . size ( ) - 1 } ; // Chop off the leading "w"
unsigned int worker_id = detail : : extract_unsigned ( worker_id_str ) ;
if ( ! worker_id_str . empty ( ) /* didn't consume everything */ | | worker_id > = workers . size ( ) ) {
LMQ_LOG ( error , " Worker id ' " , route , " ' is invalid, unable to process worker command " ) ;
return ;
}
auto & run = workers [ worker_id ] ;
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " received " , cmd , " command from " , route ) ;
2020-02-06 01:21:27 +01:00
if ( cmd = = " RAN " ) {
LMQ_LOG ( debug , " Worker " , route , " finished " , run . command ) ;
if ( run . is_batch_job ) {
2020-02-28 22:54:00 +01:00
auto & jobs = run . is_reply_job ? reply_jobs : batch_jobs ;
auto & active = run . is_reply_job ? reply_jobs_active : batch_jobs_active ;
assert ( active > 0 ) ;
active - - ;
2020-02-06 01:21:27 +01:00
bool clear_job = false ;
if ( run . batch_jobno = = - 1 ) {
// Returned from the completion function
clear_job = true ;
} else {
auto status = run . batch - > job_finished ( ) ;
if ( status = = detail : : BatchStatus : : complete ) {
2020-02-28 22:54:00 +01:00
jobs . emplace ( run . batch , - 1 ) ;
2020-02-11 07:15:22 +01:00
} else if ( status = = detail : : BatchStatus : : complete_proxy ) {
try {
run . batch - > job_completion ( ) ; // RUN DIRECTLY IN PROXY THREAD
} catch ( const std : : exception & e ) {
2020-02-28 22:54:00 +01:00
// Raise these to error levels: the caller really shouldn't be doing
// anything non-trivial in an in-proxy completion function!
2020-02-11 07:15:22 +01:00
LMQ_LOG ( error , " proxy thread caught exception when processing in-proxy completion command: " , e . what ( ) ) ;
} catch ( . . . ) {
LMQ_LOG ( error , " proxy thread caught non-standard exception when processing in-proxy completion command " ) ;
}
clear_job = true ;
2020-02-06 01:21:27 +01:00
} else if ( status = = detail : : BatchStatus : : done ) {
clear_job = true ;
}
}
if ( clear_job ) {
batches . erase ( run . batch ) ;
delete run . batch ;
run . batch = nullptr ;
}
} else {
assert ( run . cat - > active_threads > 0 ) ;
run . cat - > active_threads - - ;
}
if ( max_workers = = 0 ) { // Shutting down
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " Telling worker " , route , " to quit " ) ;
2020-02-06 01:21:27 +01:00
route_control ( workers_socket , route , " QUIT " ) ;
} else {
idle_workers . push_back ( worker_id ) ;
}
} else if ( cmd = = " QUITTING " ) {
2020-02-28 05:16:43 +01:00
workers [ worker_id ] . worker_thread . join ( ) ;
2020-02-06 01:21:27 +01:00
LMQ_LOG ( debug , " Worker " , route , " exited normally " ) ;
} else {
LMQ_LOG ( error , " Worker " , route , " sent unknown control message: ` " , cmd , " ' " ) ;
}
}
2020-02-11 07:30:07 +01:00
// Return true if we recognized/handled the builtin command (even if we reject it for whatever
// reason)
2020-02-28 05:16:43 +01:00
bool LokiMQ : : proxy_handle_builtin ( size_t conn_index , std : : vector < zmq : : message_t > & parts ) {
bool outgoing = connections [ conn_index ] . getsockopt < int > ( ZMQ_TYPE ) = = ZMQ_DEALER ;
2020-02-11 07:30:07 +01:00
string_view route , cmd ;
2020-02-28 05:16:43 +01:00
if ( parts . size ( ) < ( outgoing ? 1 : 2 ) ) {
2020-02-11 07:30:07 +01:00
LMQ_LOG ( warn , " Received empty message; ignoring " ) ;
return true ;
}
2020-02-28 05:16:43 +01:00
if ( outgoing ) {
2020-02-11 07:30:07 +01:00
cmd = view ( parts [ 0 ] ) ;
} else {
route = view ( parts [ 0 ] ) ;
cmd = view ( parts [ 1 ] ) ;
}
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " Checking for builtins: " , cmd , " from " , peer_address ( parts . back ( ) ) ) ;
2020-02-11 07:30:07 +01:00
if ( cmd = = " REPLY " ) {
2020-02-28 05:16:43 +01:00
size_t tag_pos = ( outgoing ? 1 : 2 ) ;
2020-02-11 07:30:07 +01:00
if ( parts . size ( ) < = tag_pos ) {
LMQ_LOG ( warn , " Received REPLY without a reply tag; ignoring " ) ;
return true ;
}
2020-02-25 03:20:56 +01:00
std : : string reply_tag { view ( parts [ tag_pos ] ) } ;
2020-02-11 07:30:07 +01:00
auto it = pending_requests . find ( reply_tag ) ;
if ( it ! = pending_requests . end ( ) ) {
2020-02-21 01:42:44 +01:00
LMQ_LOG ( debug , " Received REPLY for pending command " , to_hex ( reply_tag ) , " ; scheduling callback " ) ;
2020-02-11 07:30:07 +01:00
std : : vector < std : : string > data ;
data . reserve ( parts . size ( ) - ( tag_pos + 1 ) ) ;
for ( auto it = parts . begin ( ) + ( tag_pos + 1 ) ; it ! = parts . end ( ) ; + + it )
data . emplace_back ( view ( * it ) ) ;
2020-02-28 22:54:00 +01:00
proxy_schedule_reply_job ( [ callback = std : : move ( it - > second . second ) , data = std : : move ( data ) ] {
2020-02-11 07:30:07 +01:00
callback ( true , std : : move ( data ) ) ;
} ) ;
2020-02-21 01:42:44 +01:00
pending_requests . erase ( it ) ;
2020-02-11 07:30:07 +01:00
} else {
LMQ_LOG ( warn , " Received REPLY with unknown or already handled reply tag ( " , to_hex ( reply_tag ) , " ); ignoring " ) ;
}
return true ;
} else if ( cmd = = " HI " ) {
2020-02-28 05:16:43 +01:00
if ( outgoing ) {
2020-02-11 07:30:07 +01:00
LMQ_LOG ( warn , " Got invalid 'HI' message on an outgoing connection; ignoring " ) ;
return true ;
}
2020-02-12 00:08:19 +01:00
LMQ_LOG ( info , " Incoming client from " , peer_address ( parts . back ( ) ) , " sent HI, replying with HELLO " ) ;
2020-02-28 05:16:43 +01:00
send_routed_message ( connections [ conn_index ] , std : : string { route } , " HELLO " ) ;
2020-02-11 07:30:07 +01:00
return true ;
} else if ( cmd = = " HELLO " ) {
2020-02-28 05:16:43 +01:00
if ( ! outgoing ) {
2020-02-11 07:30:07 +01:00
LMQ_LOG ( warn , " Got invalid 'HELLO' message on an incoming connection; ignoring " ) ;
return true ;
}
auto it = std : : find_if ( pending_connects . begin ( ) , pending_connects . end ( ) ,
2020-02-28 05:16:43 +01:00
[ & ] ( auto & pc ) { return std : : get < size_t > ( pc ) = = conn_index ; } ) ;
2020-02-11 07:30:07 +01:00
if ( it = = pending_connects . end ( ) ) {
LMQ_LOG ( warn , " Got invalid 'HELLO' message on an already handshaked incoming connection; ignoring " ) ;
return true ;
}
2020-02-28 05:16:43 +01:00
auto & pc = * it ;
auto pit = peers . find ( std : : get < long long > ( pc ) ) ;
if ( pit = = peers . end ( ) ) {
LMQ_LOG ( warn , " Got invalid 'HELLO' message with invalid conn_id; ignoring " ) ;
return true ;
}
2020-02-11 07:30:07 +01:00
LMQ_LOG ( info , " Got initial HELLO server response from " , peer_address ( parts . back ( ) ) ) ;
2020-02-28 22:54:00 +01:00
proxy_schedule_reply_job ( [ on_success = std : : move ( std : : get < ConnectSuccess > ( pc ) ) ,
2020-02-28 05:16:43 +01:00
conn = conn_index_to_id [ conn_index ] ] {
on_success ( conn ) ;
} ) ;
2020-02-11 07:30:07 +01:00
pending_connects . erase ( it ) ;
return true ;
} else if ( cmd = = " BYE " ) {
2020-02-28 05:16:43 +01:00
if ( outgoing ) {
std : : string pk ;
bool sn ;
AuthLevel a ;
std : : tie ( pk , sn , a ) = extract_metadata ( parts . back ( ) ) ;
ConnectionID conn = sn ? ConnectionID { std : : move ( pk ) } : conn_index_to_id [ conn_index ] ;
LMQ_LOG ( info , " BYE command received; disconnecting from " , conn ) ;
proxy_disconnect ( conn , 1 s ) ;
} else {
LMQ_LOG ( warn , " Got invalid 'BYE' command on an incoming socket; ignoring " ) ;
}
2020-02-06 01:21:27 +01:00
return true ;
2020-02-03 03:39:26 +01:00
}
2020-02-06 01:21:27 +01:00
else if ( cmd = = " FORBIDDEN " | | cmd = = " NOT_A_SERVICE_NODE " ) {
return true ; // FIXME - ignore these? Log?
}
return false ;
2020-02-03 03:39:26 +01:00
}
2020-02-06 01:21:27 +01:00
LokiMQ : : run_info & LokiMQ : : get_idle_worker ( ) {
if ( idle_workers . empty ( ) ) {
size_t id = workers . size ( ) ;
assert ( workers . capacity ( ) > id ) ;
workers . emplace_back ( ) ;
auto & r = workers . back ( ) ;
r . worker_id = id ;
2020-02-28 05:16:43 +01:00
r . worker_routing_id = " w " + std : : to_string ( id ) ;
2020-02-06 01:21:27 +01:00
return r ;
2020-02-03 03:39:26 +01:00
}
2020-02-06 01:21:27 +01:00
size_t id = idle_workers . back ( ) ;
idle_workers . pop_back ( ) ;
return workers [ id ] ;
}
2020-02-28 22:54:00 +01:00
void LokiMQ : : set_batch_threads ( int threads ) {
2020-02-06 01:21:27 +01:00
if ( proxy_thread . joinable ( ) )
throw std : : logic_error ( " Cannot change reserved batch threads after calling `start()` " ) ;
2020-02-28 22:54:00 +01:00
if ( threads < - 1 ) // -1 is the default which is based on general threads
throw std : : out_of_range ( " Invalid set_batch_threads() value " + std : : to_string ( threads ) ) ;
2020-02-06 01:21:27 +01:00
batch_jobs_reserved = threads ;
}
2020-02-28 22:54:00 +01:00
void LokiMQ : : set_reply_threads ( int threads ) {
if ( proxy_thread . joinable ( ) )
throw std : : logic_error ( " Cannot change reserved reply threads after calling `start()` " ) ;
if ( threads < - 1 ) // -1 is the default which is based on general threads
throw std : : out_of_range ( " Invalid set_reply_threads() value " + std : : to_string ( threads ) ) ;
reply_jobs_reserved = threads ;
}
void LokiMQ : : set_general_threads ( int threads ) {
2020-02-06 01:21:27 +01:00
if ( proxy_thread . joinable ( ) )
throw std : : logic_error ( " Cannot change general thread count after calling `start()` " ) ;
2020-02-28 22:54:00 +01:00
if ( threads < 1 )
throw std : : out_of_range ( " Invalid set_general_threads() value " + std : : to_string ( threads ) + " : general threads must be > 0 " ) ;
2020-02-06 01:21:27 +01:00
general_workers = threads ;
}
2020-02-28 22:54:00 +01:00
LokiMQ : : run_info & LokiMQ : : run_info : : load ( category * cat_ , std : : string command_ , ConnectionID conn_ , std : : string route_ ,
std : : vector < zmq : : message_t > data_parts_ , const std : : pair < CommandCallback , bool > * callback_ ) {
2020-02-06 01:21:27 +01:00
is_batch_job = false ;
2020-02-28 22:54:00 +01:00
is_reply_job = false ;
cat = cat_ ;
command = std : : move ( command_ ) ;
conn = std : : move ( conn_ ) ;
conn_route = std : : move ( route_ ) ;
data_parts = std : : move ( data_parts_ ) ;
callback = callback_ ;
2020-02-06 01:21:27 +01:00
return * this ;
}
2020-02-28 22:54:00 +01:00
LokiMQ : : run_info & LokiMQ : : run_info : : load ( pending_command & & pending ) {
return load ( & pending . cat , std : : move ( pending . command ) , std : : move ( pending . conn ) ,
std : : move ( pending . conn_route ) , std : : move ( pending . data_parts ) , pending . callback ) ;
}
LokiMQ : : run_info & LokiMQ : : run_info : : load ( batch_job & & bj , bool reply_job ) {
2020-02-06 01:21:27 +01:00
is_batch_job = true ;
2020-02-28 22:54:00 +01:00
is_reply_job = reply_job ;
2020-02-06 01:21:27 +01:00
batch_jobno = bj . second ;
batch = bj . first ;
return * this ;
}
void LokiMQ : : proxy_run_worker ( run_info & run ) {
2020-02-28 05:16:43 +01:00
if ( ! run . worker_thread . joinable ( ) )
run . worker_thread = std : : thread { & LokiMQ : : worker_thread , this , run . worker_id } ;
2020-02-06 01:21:27 +01:00
else
2020-02-28 05:16:43 +01:00
send_routed_message ( workers_socket , run . worker_routing_id , " RUN " ) ;
2020-02-06 01:21:27 +01:00
}
2020-02-28 22:54:00 +01:00
void LokiMQ : : proxy_run_batch_jobs ( std : : queue < batch_job > & jobs , const int reserved , int & active , bool reply ) {
while ( ! jobs . empty ( ) & &
( active < reserved | | static_cast < int > ( workers . size ( ) - idle_workers . size ( ) ) < general_workers ) ) {
proxy_run_worker ( get_idle_worker ( ) . load ( std : : move ( jobs . front ( ) ) , reply ) ) ;
jobs . pop ( ) ;
active + + ;
}
}
2020-02-06 01:21:27 +01:00
void LokiMQ : : proxy_process_queue ( ) {
2020-02-28 22:54:00 +01:00
// First up: process any batch jobs; since these are internal they are given higher priority.
proxy_run_batch_jobs ( batch_jobs , batch_jobs_reserved , batch_jobs_active , false ) ;
2020-02-06 01:21:27 +01:00
2020-02-28 22:54:00 +01:00
// Next any reply batch jobs (which are a bit different from the above, since they are
// externally triggered but for things we initiated locally).
proxy_run_batch_jobs ( reply_jobs , reply_jobs_reserved , reply_jobs_active , true ) ;
// Finally general incoming commands
2020-02-06 01:21:27 +01:00
for ( auto it = pending_commands . begin ( ) ; it ! = pending_commands . end ( ) & & active_workers ( ) < max_workers ; ) {
auto & pending = * it ;
if ( pending . cat . active_threads < pending . cat . reserved_threads
| | active_workers ( ) < general_workers ) {
2020-02-28 22:54:00 +01:00
proxy_run_worker ( get_idle_worker ( ) . load ( std : : move ( pending ) ) ) ;
2020-02-06 01:21:27 +01:00
pending . cat . queued - - ;
pending . cat . active_threads + + ;
assert ( pending . cat . queued > = 0 ) ;
it = pending_commands . erase ( it ) ;
} else {
+ + it ; // no available general or reserved worker spots for this job right now
}
}
}
void LokiMQ : : proxy_to_worker ( size_t conn_index , std : : vector < zmq : : message_t > & parts ) {
2020-02-28 05:16:43 +01:00
bool outgoing = connections [ conn_index ] . getsockopt < int > ( ZMQ_TYPE ) = = ZMQ_DEALER ;
peer_info tmp_peer ;
tmp_peer . conn_index = conn_index ;
if ( ! outgoing ) tmp_peer . route = parts [ 0 ] . to_string ( ) ;
peer_info * peer = nullptr ;
if ( outgoing ) {
auto it = peers . find ( conn_index_to_id [ conn_index ] ) ;
if ( it = = peers . end ( ) ) {
LMQ_LOG ( warn , " Internal error: connection index not found " ) ;
return ;
}
peer = & it - > second ;
} else {
std : : tie ( tmp_peer . pubkey , tmp_peer . service_node , tmp_peer . auth_level ) = extract_metadata ( parts . back ( ) ) ;
if ( tmp_peer . service_node ) {
// It's a service node so we should have a peer_info entry; see if we can find one with
// the same route, and if not, add one.
auto pr = peers . equal_range ( tmp_peer . pubkey ) ;
for ( auto it = pr . first ; it ! = pr . second ; + + it ) {
if ( it - > second . route = = tmp_peer . route ) {
peer = & it - > second ;
// Upgrade permissions in case we have something higher on the socket
peer - > service_node | = tmp_peer . service_node ;
if ( tmp_peer . auth_level > peer - > auth_level )
peer - > auth_level = tmp_peer . auth_level ;
break ;
}
}
if ( ! peer ) {
peer = & peers . emplace ( ConnectionID { tmp_peer . pubkey } , std : : move ( tmp_peer ) ) - > second ;
}
} else {
// Incoming, non-SN connection: we don't store a peer_info for this, so just use the
// temporary one
peer = & tmp_peer ;
}
}
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
size_t command_part_index = outgoing ? 0 : 1 ;
2020-02-03 03:39:26 +01:00
std : : string command = parts [ command_part_index ] . to_string ( ) ;
auto cat_call = get_command ( command ) ;
if ( ! cat_call . first ) {
2020-02-28 05:16:43 +01:00
if ( outgoing )
send_direct_message ( connections [ conn_index ] , " UNKNOWNCOMMAND " , command ) ;
2020-02-03 03:39:26 +01:00
else
2020-02-28 05:16:43 +01:00
send_routed_message ( connections [ conn_index ] , peer - > route , " UNKNOWNCOMMAND " , command ) ;
2020-02-03 03:39:26 +01:00
return ;
}
auto & category = * cat_call . first ;
2020-02-28 05:16:43 +01:00
if ( ! proxy_check_auth ( conn_index , outgoing , * peer , command , category , parts . back ( ) ) )
2020-02-03 03:39:26 +01:00
return ;
2020-02-06 01:21:27 +01:00
// Steal any data message parts
size_t data_part_index = command_part_index + 1 ;
std : : vector < zmq : : message_t > data_parts ;
data_parts . reserve ( parts . size ( ) - data_part_index ) ;
for ( auto it = parts . begin ( ) + data_part_index ; it ! = parts . end ( ) ; + + it )
data_parts . push_back ( std : : move ( * it ) ) ;
if ( category . active_threads > = category . reserved_threads & & active_workers ( ) > = general_workers ) {
// No free reserved or general spots, try to queue it for later
if ( category . max_queue > = 0 & & category . queued > = category . max_queue ) {
LMQ_LOG ( warn , " No space to queue incoming command " , command , " ; already have " , category . queued ,
" commands queued in that category (max " , category . max_queue , " ); dropping message " ) ;
return ;
}
2020-02-03 03:39:26 +01:00
2020-02-06 01:21:27 +01:00
LMQ_LOG ( debug , " No available free workers, queuing " , command , " for later " ) ;
2020-02-28 05:16:43 +01:00
ConnectionID conn { peer - > service_node ? ConnectionID : : SN_ID : conn_index_to_id [ conn_index ] . id , peer - > pubkey } ;
pending_commands . emplace_back ( category , std : : move ( command ) , std : : move ( data_parts ) , cat_call . second , std : : move ( conn ) , tmp_peer . route ) ;
2020-02-06 01:21:27 +01:00
category . queued + + ;
2020-02-03 03:39:26 +01:00
return ;
}
2020-02-11 07:30:07 +01:00
if ( cat_call . second - > second /*is_request*/ & & data_parts . empty ( ) ) {
LMQ_LOG ( warn , " Received an invalid request command with no reply tag; dropping message " ) ;
return ;
}
2020-02-06 01:21:27 +01:00
auto & run = get_idle_worker ( ) ;
2020-02-28 22:54:00 +01:00
{
ConnectionID c { peer - > service_node ? ConnectionID : : SN_ID : conn_index_to_id [ conn_index ] . id , peer - > pubkey } ;
if ( outgoing | | peer - > service_node )
tmp_peer . route . clear ( ) ;
run . load ( & category , std : : move ( command ) , std : : move ( c ) , std : : move ( tmp_peer . route ) ,
std : : move ( data_parts ) , cat_call . second ) ;
2020-02-28 05:16:43 +01:00
}
if ( outgoing )
peer - > activity ( ) ; // outgoing connection activity, pump the activity timer
2020-02-03 03:39:26 +01:00
2020-02-28 05:16:43 +01:00
LMQ_TRACE ( " Forwarding incoming " , run . command , " from " , run . conn , " @ " , peer_address ( parts . back ( ) ) ,
" to worker " , run . worker_routing_id ) ;
2020-02-03 03:39:26 +01:00
2020-02-06 01:21:27 +01:00
proxy_run_worker ( run ) ;
category . active_threads + + ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
bool LokiMQ : : proxy_check_auth ( size_t conn_index , bool outgoing , const peer_info & peer ,
const std : : string & command , const category & cat , zmq : : message_t & msg ) {
2020-02-03 03:39:26 +01:00
std : : string reply ;
if ( peer . auth_level < cat . access . auth ) {
2020-02-28 05:16:43 +01:00
LMQ_LOG ( warn , " Access denied to " , command , " for peer [ " , to_hex ( peer . pubkey ) , " ]/ " , peer_address ( msg ) ,
2020-02-03 03:39:26 +01:00
" : peer auth level " , to_string ( peer . auth_level ) , " < " , to_string ( cat . access . auth ) ) ;
reply = " FORBIDDEN " ;
}
else if ( cat . access . local_sn & & ! local_service_node ) {
2020-02-28 05:16:43 +01:00
LMQ_LOG ( warn , " Access denied to " , command , " for peer [ " , to_hex ( peer . pubkey ) , " ]/ " , peer_address ( msg ) ,
2020-02-03 03:39:26 +01:00
" : that command is only available when this LokiMQ is running in service node mode " ) ;
reply = " NOT_A_SERVICE_NODE " ;
}
else if ( cat . access . remote_sn & & ! peer . service_node ) {
2020-02-28 05:16:43 +01:00
LMQ_LOG ( warn , " Access denied to " , command , " for peer [ " , to_hex ( peer . pubkey ) , " ]/ " , peer_address ( msg ) ,
2020-02-03 03:39:26 +01:00
" : remote is not recognized as a service node " ) ;
// Disconnect: we don't think the remote is a SN, but it issued a command only SNs should be
// issuing. Drop the connection; if the remote has something important to relay it will
// reconnect, at which point we will reassess the SN status on the new incoming connection.
2020-02-28 05:16:43 +01:00
if ( outgoing )
proxy_disconnect ( peer . service_node ? ConnectionID { peer . pubkey } : conn_index_to_id [ conn_index ] , 1 s ) ;
2020-02-03 03:39:26 +01:00
else
2020-02-28 05:16:43 +01:00
send_routed_message ( connections [ conn_index ] , peer . route , " BYE " ) ;
2020-02-03 03:39:26 +01:00
return false ;
}
if ( reply . empty ( ) )
return true ;
2020-02-28 05:16:43 +01:00
if ( outgoing )
send_direct_message ( connections [ conn_index ] , std : : move ( reply ) , command ) ;
2020-02-03 03:39:26 +01:00
else
2020-02-28 05:16:43 +01:00
send_routed_message ( connections [ conn_index ] , peer . route , std : : move ( reply ) , command ) ;
return false ;
2020-02-03 03:39:26 +01:00
}
2020-02-28 05:16:43 +01:00
void LokiMQ : : process_zap_requests ( ) {
2020-02-06 05:50:31 +01:00
for ( std : : vector < zmq : : message_t > frames ; recv_message_parts ( zap_auth , frames , zmq : : recv_flags : : dontwait ) ; frames . clear ( ) ) {
2020-02-13 03:10:40 +01:00
# ifndef NDEBUG
2020-02-06 01:21:27 +01:00
if ( log_level ( ) > = LogLevel : : trace ) {
2020-02-03 03:39:26 +01:00
std : : ostringstream o ;
o < < " Processing ZAP authentication request: " ;
for ( size_t i = 0 ; i < frames . size ( ) ; i + + ) {
o < < " \n [ " < < i < < " ]: " ;
auto v = view ( frames [ i ] ) ;
if ( i = = 1 | | i = = 6 )
o < < to_hex ( v ) ;
else
o < < v ;
}
log_ ( LogLevel : : trace , __FILE__ , __LINE__ , o . str ( ) ) ;
2020-02-13 03:10:40 +01:00
} else
# endif
2020-02-03 03:39:26 +01:00
LMQ_LOG ( debug , " Processing ZAP authentication request " ) ;
// https://rfc.zeromq.org/spec:27/ZAP/
//
// The request message SHALL consist of the following message frames:
//
// The version frame, which SHALL contain the three octets "1.0".
// The request id, which MAY contain an opaque binary blob.
// The domain, which SHALL contain a (non-empty) string.
// The address, the origin network IP address.
// The identity, the connection Identity, if any.
// The mechanism, which SHALL contain a string.
// The credentials, which SHALL be zero or more opaque frames.
//
// The reply message SHALL consist of the following message frames:
//
// The version frame, which SHALL contain the three octets "1.0".
// The request id, which MAY contain an opaque binary blob.
// The status code, which SHALL contain a string.
// The status text, which MAY contain a string.
// The user id, which SHALL contain a string.
// The metadata, which MAY contain a blob.
//
// (NB: there are also null address delimiters at the beginning of each mentioned in the
// RFC, but those have already been removed through the use of a REP socket)
std : : vector < std : : string > response_vals ( 6 ) ;
response_vals [ 0 ] = " 1.0 " ; // version
if ( frames . size ( ) > = 2 )
2020-02-25 03:20:56 +01:00
response_vals [ 1 ] = std : : string { view ( frames [ 1 ] ) } ; // unique identifier
2020-02-03 03:39:26 +01:00
std : : string & status_code = response_vals [ 2 ] , & status_text = response_vals [ 3 ] ;
if ( frames . size ( ) < 6 | | view ( frames [ 0 ] ) ! = " 1.0 " ) {
LMQ_LOG ( error , " Bad ZAP authentication request: version != 1.0 or invalid ZAP message parts " ) ;
status_code = " 500 " ;
status_text = " Internal error: invalid auth request " ;
2020-02-28 05:16:43 +01:00
} else {
auto auth_domain = view ( frames [ 2 ] ) ;
size_t bind_id = ( size_t ) - 1 ;
try {
bind_id = bt_deserialize < size_t > ( view ( frames [ 2 ] ) ) ;
} catch ( . . . ) { }
if ( bind_id > = bind . size ( ) ) {
LMQ_LOG ( error , " Bad ZAP authentication request: invalid auth domain ' " , auth_domain , " ' " ) ;
2020-02-03 03:39:26 +01:00
status_code = " 400 " ;
2020-02-28 05:16:43 +01:00
status_text = " Unknown authentication domain: " + std : : string { auth_domain } ;
} else if ( bind [ bind_id ] . second . curve
? ! ( frames . size ( ) = = 7 & & view ( frames [ 5 ] ) = = " CURVE " )
: ! ( frames . size ( ) = = 6 & & view ( frames [ 5 ] ) = = " NULL " ) ) {
LMQ_LOG ( error , " Bad ZAP authentication request: invalid " ,
bind [ bind_id ] . second . curve ? " CURVE " : " NULL " , " authentication request " ) ;
status_code = " 500 " ;
status_text = " Invalid authentication request mechanism " ;
} else if ( bind [ bind_id ] . second . curve & & frames [ 6 ] . size ( ) ! = 32 ) {
LMQ_LOG ( error , " Bad ZAP authentication request: invalid request pubkey " ) ;
status_code = " 500 " ;
status_text = " Invalid public key size for CURVE authentication " ;
2020-02-03 03:39:26 +01:00
} else {
2020-02-28 05:16:43 +01:00
auto ip = view ( frames [ 3 ] ) ;
string_view pubkey ;
if ( bind [ bind_id ] . second . curve )
pubkey = view ( frames [ 6 ] ) ;
auto result = bind [ bind_id ] . second . allow ( ip , pubkey ) ;
2020-02-03 03:39:26 +01:00
bool sn = result . remote_sn ;
auto & user_id = response_vals [ 4 ] ;
2020-02-28 05:16:43 +01:00
if ( bind [ bind_id ] . second . curve ) {
user_id . reserve ( 64 ) ;
to_hex ( pubkey . begin ( ) , pubkey . end ( ) , std : : back_inserter ( user_id ) ) ;
}
2020-02-03 03:39:26 +01:00
if ( result . auth < = AuthLevel : : denied | | result . auth > AuthLevel : : admin ) {
2020-02-28 05:16:43 +01:00
LMQ_LOG ( info , " Access denied for incoming " , view ( frames [ 5 ] ) , ( sn ? " service node " : " client " ) ,
" connection from " , ! user_id . empty ( ) ? user_id + " at " : " " s , ip ,
" with initial auth level " , to_string ( result . auth ) ) ;
2020-02-03 03:39:26 +01:00
status_code = " 400 " ;
status_text = " Access denied " ;
user_id . clear ( ) ;
2020-02-28 05:16:43 +01:00
} else {
LMQ_LOG ( info , " Accepted incoming " , view ( frames [ 5 ] ) , ( sn ? " service node " : " client " ) ,
" connection with authentication level " , to_string ( result . auth ) ,
" from " , ! user_id . empty ( ) ? user_id + " at " : " " s , ip ) ;
auto & metadata = response_vals [ 5 ] ;
metadata + = zmtp_metadata ( " X-SN " , result . remote_sn ? " 1 " : " 0 " ) ;
2020-02-03 03:39:26 +01:00
metadata + = zmtp_metadata ( " X-AuthLevel " , to_string ( result . auth ) ) ;
2020-02-28 05:16:43 +01:00
status_code = " 200 " ;
status_text = " " ;
}
2020-02-03 03:39:26 +01:00
}
}
2020-02-13 03:10:40 +01:00
LMQ_TRACE ( " ZAP request result: " , status_code , " " , status_text ) ;
2020-02-03 03:39:26 +01:00
std : : vector < zmq : : message_t > response ;
response . reserve ( response_vals . size ( ) ) ;
for ( auto & r : response_vals ) response . push_back ( create_message ( std : : move ( r ) ) ) ;
send_message_parts ( zap_auth , response . begin ( ) , response . end ( ) ) ;
}
}
LokiMQ : : ~ LokiMQ ( ) {
LMQ_LOG ( info , " LokiMQ shutting down proxy thread " ) ;
detail : : send_control ( get_control_socket ( ) , " QUIT " ) ;
proxy_thread . join ( ) ;
LMQ_LOG ( info , " LokiMQ proxy thread has stopped " ) ;
}
2020-02-28 05:16:43 +01:00
ConnectionID LokiMQ : : connect_sn ( string_view pubkey , std : : chrono : : milliseconds keep_alive , string_view hint ) {
2020-02-11 07:30:07 +01:00
check_started ( proxy_thread , " connect " ) ;
detail : : send_control ( get_control_socket ( ) , " CONNECT_SN " , bt_serialize < bt_dict > ( { { " pubkey " , pubkey } , { " keep-alive " , keep_alive . count ( ) } , { " hint " , hint } } ) ) ;
2020-02-28 05:16:43 +01:00
return pubkey ;
2020-02-11 07:30:07 +01:00
}
2020-02-28 05:16:43 +01:00
ConnectionID LokiMQ : : connect_remote ( string_view remote , ConnectSuccess on_connect , ConnectFailure on_failure ,
string_view pubkey , AuthLevel auth_level , std : : chrono : : milliseconds timeout ) {
2020-02-11 07:30:07 +01:00
if ( ! proxy_thread . joinable ( ) )
LMQ_LOG ( warn , " connect_remote() called before start(); this won't take effect until start() is called " ) ;
2020-02-28 05:16:43 +01:00
if ( remote . size ( ) < 7 | | ! ( remote . substr ( 0 , 6 ) = = " tcp:// " | | remote . substr ( 0 , 6 ) = = " ipc:// " /* unix domain sockets */ ) )
throw std : : runtime_error ( " Invalid connect_remote: remote address ' " + std : : string { remote } + " ' is not a valid or supported zmq connect string " ) ;
auto id = next_conn_id + + ;
LMQ_TRACE ( " telling proxy to connect to " , remote , " , id " , id ,
pubkey . empty ( ) ? " using NULL auth " : " , using CURVE with remote pubkey [ " + to_hex ( pubkey ) + " ] " ) ;
2020-02-11 07:30:07 +01:00
detail : : send_control ( get_control_socket ( ) , " CONNECT_REMOTE " , bt_serialize < bt_dict > ( {
2020-02-28 05:16:43 +01:00
{ " auth " , static_cast < std : : underlying_type_t < AuthLevel > > ( auth_level ) } ,
{ " conn_id " , id } ,
2020-02-11 07:30:07 +01:00
{ " connect " , reinterpret_cast < uintptr_t > ( new ConnectSuccess { std : : move ( on_connect ) } ) } ,
{ " failure " , reinterpret_cast < uintptr_t > ( new ConnectFailure { std : : move ( on_failure ) } ) } ,
2020-02-28 05:16:43 +01:00
{ " pubkey " , pubkey } ,
{ " remote " , remote } ,
{ " timeout " , timeout . count ( ) } ,
2020-02-11 07:30:07 +01:00
} ) ) ;
2020-02-28 05:16:43 +01:00
return id ;
2020-02-03 03:39:26 +01:00
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : proxy_connect_remote ( bt_dict_consumer data ) {
2020-02-28 05:16:43 +01:00
AuthLevel auth_level = AuthLevel : : none ;
long long conn_id = - 1 ;
2020-02-11 07:30:07 +01:00
ConnectSuccess on_connect ;
ConnectFailure on_failure ;
std : : string remote ;
std : : string remote_pubkey ;
std : : chrono : : milliseconds timeout = REMOTE_CONNECT_TIMEOUT ;
2020-02-28 05:16:43 +01:00
if ( data . skip_until ( " auth_level " ) )
auth_level = static_cast < AuthLevel > ( data . consume_integer < std : : underlying_type_t < AuthLevel > > ( ) ) ;
if ( data . skip_until ( " conn_id " ) )
conn_id = data . consume_integer < long long > ( ) ;
2020-02-11 07:30:07 +01:00
if ( data . skip_until ( " connect " ) ) {
auto * ptr = reinterpret_cast < ConnectSuccess * > ( data . consume_integer < uintptr_t > ( ) ) ;
on_connect = std : : move ( * ptr ) ;
delete ptr ;
}
if ( data . skip_until ( " failure " ) ) {
auto * ptr = reinterpret_cast < ConnectFailure * > ( data . consume_integer < uintptr_t > ( ) ) ;
on_failure = std : : move ( * ptr ) ;
delete ptr ;
}
if ( data . skip_until ( " pubkey " ) ) {
remote_pubkey = data . consume_string ( ) ;
assert ( remote_pubkey . size ( ) = = 32 | | remote_pubkey . empty ( ) ) ;
}
2020-02-28 05:16:43 +01:00
if ( data . skip_until ( " remote " ) )
2020-02-11 07:30:07 +01:00
remote = data . consume_string ( ) ;
if ( data . skip_until ( " timeout " ) )
timeout = std : : chrono : : milliseconds { data . consume_integer < uint64_t > ( ) } ;
2020-02-28 05:16:43 +01:00
if ( conn_id = = - 1 | | remote . empty ( ) )
throw std : : runtime_error ( " Internal error: CONNECT_REMOTE proxy command missing required 'conn_id' and/or 'remote' value " ) ;
2020-02-11 07:30:07 +01:00
2020-02-28 05:16:43 +01:00
LMQ_LOG ( info , " Establishing remote connection to " , remote , remote_pubkey . empty ( ) ? " (NULL auth) " : " via CURVE expecting pubkey " + to_hex ( remote_pubkey ) ) ;
assert ( conn_index_to_id . size ( ) = = connections . size ( ) ) ;
2020-02-11 07:30:07 +01:00
zmq : : socket_t sock { context , zmq : : socket_type : : dealer } ;
try {
setup_outgoing_socket ( sock , remote_pubkey ) ;
sock . connect ( remote ) ;
} catch ( const zmq : : error_t & e ) {
2020-02-28 22:54:00 +01:00
proxy_schedule_reply_job ( [ conn_id , on_failure = std : : move ( on_failure ) , what = " connect() failed: " s + e . what ( ) ] {
2020-02-28 05:16:43 +01:00
on_failure ( conn_id , std : : move ( what ) ) ;
} ) ;
2020-02-11 07:30:07 +01:00
return ;
}
2020-02-28 05:16:43 +01:00
connections . push_back ( std : : move ( sock ) ) ;
LMQ_LOG ( debug , " Opened new zmq socket to " , remote , " , conn_id " , conn_id , " ; sending HI " ) ;
send_direct_message ( connections . back ( ) , " HI " ) ;
pending_connects . emplace_back ( connections . size ( ) - 1 , conn_id , std : : chrono : : steady_clock : : now ( ) + timeout ,
2020-02-11 07:30:07 +01:00
std : : move ( on_connect ) , std : : move ( on_failure ) ) ;
2020-02-28 05:16:43 +01:00
peer_info peer ;
peer . pubkey = std : : move ( remote_pubkey ) ;
peer . service_node = false ;
peer . auth_level = auth_level ;
peer . conn_index = connections . size ( ) - 1 ;
ConnectionID conn { conn_id , peer . pubkey } ;
conn_index_to_id . push_back ( conn ) ;
assert ( connections . size ( ) = = conn_index_to_id . size ( ) ) ;
peer . idle_expiry = 24 h * 10 * 365 ; // "forever"
peer . activity ( ) ;
peers . emplace ( std : : move ( conn ) , std : : move ( peer ) ) ;
2020-02-11 07:30:07 +01:00
}
2020-02-28 05:16:43 +01:00
void LokiMQ : : disconnect ( ConnectionID id , std : : chrono : : milliseconds linger ) {
detail : : send_control ( get_control_socket ( ) , " DISCONNECT " , bt_serialize < bt_dict > ( {
{ " conn_id " , id . id } ,
{ " linger_ms " , linger . count ( ) } ,
{ " pubkey " , id . pk } ,
} ) ) ;
2020-02-11 07:30:07 +01:00
}
2020-02-28 05:16:43 +01:00
void LokiMQ : : proxy_disconnect ( bt_dict_consumer data ) {
ConnectionID connid { - 1 } ;
std : : chrono : : milliseconds linger = 1 s ;
if ( data . skip_until ( " conn_id " ) )
connid . id = data . consume_integer < long long > ( ) ;
if ( data . skip_until ( " linger_ms " ) )
linger = std : : chrono : : milliseconds ( data . consume_integer < long long > ( ) ) ;
if ( data . skip_until ( " pubkey " ) )
connid . pk = data . consume_string ( ) ;
if ( connid . sn ( ) & & connid . pk . size ( ) ! = 32 )
throw std : : runtime_error ( " Error: invalid disconnect of SN without a valid pubkey " ) ;
proxy_disconnect ( std : : move ( connid ) , linger ) ;
}
void LokiMQ : : proxy_disconnect ( ConnectionID conn , std : : chrono : : milliseconds linger ) {
LMQ_TRACE ( " Disconnecting outgoing connection to " , conn ) ;
auto pr = peers . equal_range ( conn ) ;
for ( auto it = pr . first ; it ! = pr . second ; + + it ) {
auto & peer = it - > second ;
if ( peer . outgoing ( ) ) {
LMQ_LOG ( info , " Closing outgoing connection to " , conn ) ;
proxy_close_connection ( peer . conn_index , linger ) ;
peers . erase ( it ) ;
return ;
}
}
LMQ_LOG ( warn , " Failed to disconnect " , conn , " : no such outgoing connection " ) ;
}
2020-02-11 07:30:07 +01:00
void LokiMQ : : job ( std : : function < void ( ) > f ) {
2020-02-06 01:21:27 +01:00
auto * b = new Batch < void > ;
b - > add_job ( std : : move ( f ) ) ;
auto * baseptr = static_cast < detail : : Batch * > ( b ) ;
detail : : send_control ( get_control_socket ( ) , " BATCH " , bt_serialize ( reinterpret_cast < uintptr_t > ( baseptr ) ) ) ;
}
2020-02-11 07:29:00 +01:00
void LokiMQ : : _queue_timer_job ( int timer_id ) {
auto it = timer_jobs . find ( timer_id ) ;
if ( it = = timer_jobs . end ( ) ) {
LMQ_LOG ( warn , " Could not find timer job " , timer_id ) ;
return ;
}
auto & timer = it - > second ;
auto & squelch = std : : get < 1 > ( timer ) ;
auto & running = std : : get < 2 > ( timer ) ;
if ( squelch & & running ) {
LMQ_LOG ( debug , " Not running timer job " , timer_id , " because a job for that timer is still running " ) ;
return ;
}
auto * b = new Batch < void > ;
b - > add_job ( std : : get < 0 > ( timer ) ) ;
if ( squelch ) {
running = true ;
b - > completion_proxy ( [ this , timer_id ] ( auto results ) {
try { results [ 0 ] . get ( ) ; }
catch ( const std : : exception & e ) { LMQ_LOG ( warn , " timer job " , timer_id , " raised an exception: " , e . what ( ) ) ; }
catch ( . . . ) { LMQ_LOG ( warn , " timer job " , timer_id , " raised a non-std exception " ) ; }
auto it = timer_jobs . find ( timer_id ) ;
if ( it ! = timer_jobs . end ( ) )
std : : get < 2 > ( it - > second ) /*running*/ = false ;
} ) ;
}
batches . insert ( b ) ;
batch_jobs . emplace ( static_cast < detail : : Batch * > ( b ) , 0 ) ;
assert ( b - > size ( ) = = 1 ) ;
}
void LokiMQ : : add_timer ( std : : function < void ( ) > job , std : : chrono : : milliseconds interval , bool squelch ) {
if ( proxy_thread . joinable ( ) ) {
auto * jobptr = new std : : function < void ( ) > { std : : move ( job ) } ;
detail : : send_control ( get_control_socket ( ) , " TIMER " , bt_serialize ( bt_list { {
reinterpret_cast < uintptr_t > ( jobptr ) ,
interval . count ( ) ,
squelch } } ) ) ;
} else {
proxy_timer ( std : : move ( job ) , interval , squelch ) ;
}
}
void LokiMQ : : TimersDeleter : : operator ( ) ( void * timers ) { zmq_timers_destroy ( & timers ) ; }
std : : ostream & operator < < ( std : : ostream & os , LogLevel lvl ) {
os < < ( lvl = = LogLevel : : trace ? " trace " :
lvl = = LogLevel : : debug ? " debug " :
lvl = = LogLevel : : info ? " info " :
lvl = = LogLevel : : warn ? " warn " :
lvl = = LogLevel : : error ? " ERROR " :
lvl = = LogLevel : : fatal ? " FATAL " :
" unknown " ) ;
return os ;
}
2020-02-03 03:39:26 +01:00
2020-02-11 07:30:07 +01:00
std : : string make_random_string ( size_t size ) {
static thread_local std : : mt19937_64 rng { std : : random_device { } ( ) } ;
static thread_local std : : uniform_int_distribution < char > dist { std : : numeric_limits < char > : : min ( ) , std : : numeric_limits < char > : : max ( ) } ;
std : : string rando ;
rando . reserve ( size ) ;
for ( size_t i = 0 ; i < size ; i + + )
rando + = dist ( rng ) ;
return rando ;
2020-02-03 03:39:26 +01:00
}
2020-02-11 07:30:07 +01:00
} // namespace lokimq
2020-02-03 03:39:26 +01:00
// vim:sw=4:et