Add command to fetch RCs from remote node

This command will be called periodically by clients to maintain a list
of RCs of active relay nodes.  It will require another command (future
commit) to fetch the RouterIDs from many nodes and reconcile those so we
have some notion of good-ness of the RCs we're getting; if we get what
seems to be a bad set of RCs (this concept not yet implemented), we will
choose a different relay to fetch RCs from.  These are left as TODOs for
now.
This commit is contained in:
Thomas Winget 2023-11-17 02:41:42 -05:00
parent d520e1d2c4
commit 6952e8f705
8 changed files with 276 additions and 8 deletions

@ -1 +1 @@
Subproject commit a7de63756dcc5c31cb899a4b810e6434b1a7c01c
Subproject commit f6172d58d3358473a4c98d96270058a32e166d5f

View File

@ -6,10 +6,13 @@
#include <llarp/messages/dht.hpp>
#include <llarp/messages/exit.hpp>
#include <llarp/messages/path.hpp>
#include <llarp/messages/rc.hpp>
#include <llarp/nodedb.hpp>
#include <llarp/path/path.hpp>
#include <llarp/router/router.hpp>
#include <oxenc/bt_producer.h>
#include <algorithm>
#include <set>
@ -427,6 +430,120 @@ namespace llarp
}
}
void
LinkManager::fetch_rcs(
const RouterID& source, rc_time since, const std::vector<RouterID>& explicit_ids)
{
send_control_message(
source,
"fetch_rcs",
RCFetchMessage::serialize(since, explicit_ids),
[this, source = source](oxen::quic::message m) {
if (m.timed_out)
{
// TODO: keep track of this failure for relay quality metrics?
log::info(link_cat, "RC Fetch to {} timed out", source);
return;
}
if (not m)
{
log::info(link_cat, "RC Fetch to {} returned error.", source);
return;
}
try
{
oxenc::bt_dict_consumer btdc{m.body()};
btdc.required("rcs");
auto btlc = btdc.consume_list_consumer();
auto timestamp = rc_time{std::chrono::seconds{btdc.require<int64_t>("time")}};
std::vector<RemoteRC> rcs;
while (not btlc.is_finished())
{
// TODO: maybe make RemoteRC constructor throw a bespoke exception type
// and catch it below so we know what about parsing failed?
rcs.emplace_back(btlc.consume_dict_consumer());
}
node_db->ingest_rcs(source, std::move(rcs), timestamp);
}
catch (const std::exception& e)
{
// TODO: Inform NodeDB of failure (perhaps just a call to rotate_rc_source())
log::info(link_cat, "Failed to parse RC Fetch response from {}", source);
return;
}
});
}
void
LinkManager::handle_fetch_rcs(oxen::quic::message m)
{
// this handler should not be registered for clients
assert(_router.is_service_node());
const auto& rcs = node_db->get_rcs();
const auto now =
std::chrono::time_point_cast<std::chrono::seconds>(std::chrono::system_clock::now());
try
{
oxenc::bt_dict_consumer btdc{m.body()};
btdc.required("explicit_ids");
auto explicit_ids = btdc.consume_list<std::vector<std::string>>();
auto since_time = rc_time{std::chrono::seconds{btdc.require<int64_t>("since")}};
if (explicit_ids.size() > (rcs.size() / 4))
{
log::info(
link_cat, "Remote requested too many relay IDs (greater than 1/4 of what we have).");
m.respond(
serialize_response({{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}}));
return;
}
std::unordered_set<RouterID> explicit_relays;
for (auto& sv : explicit_ids)
{
if (sv.size() != RouterID::SIZE)
{
m.respond(serialize_response(
{{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}}));
return;
}
explicit_relays.emplace(reinterpret_cast<const byte_t*>(sv.data()));
}
oxenc::bt_dict_producer resp;
{
auto btlp = resp.append_list("rcs");
const auto& last_time = node_db->get_last_rc_update_times();
// if since_time isn't epoch start, subtract a bit for buffer
if (since_time != decltype(since_time)::min())
since_time -= 5s;
for (const auto& [_, rc] : rcs)
{
if (last_time.at(rc.router_id()) > since_time or explicit_relays.count(rc.router_id()))
btlp.append_encoded(rc.view());
}
}
resp.append("time", now.time_since_epoch().count());
m.respond(std::move(resp).str(), false);
}
catch (const std::exception& e)
{
log::info(link_cat, "Exception handling RC Fetch request: {}", e.what());
m.respond(messages::ERROR_RESPONSE);
}
}
bool
LinkManager::have_connection_to(const RouterID& remote, bool client_only) const
{

View File

@ -226,6 +226,12 @@ namespace llarp
void
handle_gossip_rc(oxen::quic::message m);
void
fetch_rcs(const RouterID& source, rc_time since, const std::vector<RouterID>& explicit_ids);
void
handle_fetch_rcs(oxen::quic::message m);
bool
have_connection_to(const RouterID& remote, bool client_only = false) const;

28
llarp/messages/rc.hpp Normal file
View File

@ -0,0 +1,28 @@
#pragma once
#include "common.hpp"
namespace llarp::RCFetchMessage
{
inline constexpr auto INVALID_REQUEST = "Invalid relay ID requested."sv;
inline static std::string
serialize(std::chrono::system_clock::time_point since, const std::vector<RouterID>& explicit_ids)
{
oxenc::bt_dict_producer btdp;
try
{
btdp.append("since", since.time_since_epoch() / 1s);
auto id_list = btdp.append_list("explicit_ids");
for (const auto& rid : explicit_ids)
id_list.append(rid.ToView());
}
catch (...)
{
log::error(link_cat, "Error: RCFetchMessage failed to bt encode contents!");
}
return std::move(btdp).str();
}
} // namespace llarp::RCFetchMessage

View File

@ -112,6 +112,60 @@ namespace llarp
bootstraps.emplace(rc.router_id(), rc);
}
void
NodeDB::rotate_rc_source()
{}
// TODO: trust model
void
NodeDB::ingest_rcs(RouterID source, std::vector<RemoteRC> rcs, rc_time timestamp)
{
(void)source;
// TODO: if we don't currently have a "trusted" relay we've been fetching from,
// this will be a full list of RCs. We need to first check if it aligns closely
// with our trusted RouterID list, then replace our RCs with the incoming set.
for (auto& rc : rcs)
put_rc_if_newer(std::move(rc), timestamp);
// TODO: if we have a "trusted" relay we've been fetching from, this will be
// an incremental update to the RC list, so *after* insertion we check if the
// RCs' RouterIDs closely match our trusted RouterID list.
last_rc_update_relay_timestamp = timestamp;
}
// TODO: trust model
void
NodeDB::ingest_router_ids(RouterID source, std::vector<RouterID> ids)
{
router_id_fetch_responses[source] = std::move(ids);
router_id_response_count++;
if (router_id_response_count == router_id_fetch_sources.size())
{
// TODO: reconcile all the responses
}
}
void
NodeDB::update_rcs()
{
std::vector<RouterID> needed;
const auto now =
std::chrono::time_point_cast<std::chrono::seconds>(std::chrono::system_clock::now());
for (const auto& [rid, rc] : known_rcs)
{
if (now - rc.timestamp() > RouterContact::OUTDATED_AGE)
needed.push_back(rid);
}
router.link_manager().fetch_rcs(
rc_fetch_source, last_rc_update_relay_timestamp, std::move(needed));
}
void
NodeDB::set_router_whitelist(
const std::vector<RouterID>& whitelist,
@ -277,13 +331,14 @@ namespace llarp
}
bool
NodeDB::put_rc(RemoteRC rc)
NodeDB::put_rc(RemoteRC rc, rc_time now)
{
const auto& rid = rc.router_id();
if (not want_rc(rid))
return false;
known_rcs.erase(rid);
known_rcs.emplace(rid, std::move(rc));
last_rc_update_times[rid] = now;
return true;
}
@ -294,12 +349,12 @@ namespace llarp
}
bool
NodeDB::put_rc_if_newer(RemoteRC rc)
NodeDB::put_rc_if_newer(RemoteRC rc, rc_time now)
{
auto itr = known_rcs.find(rc.router_id());
if (itr == known_rcs.end() or itr->second.other_is_newer(rc))
{
return put_rc(std::move(rc));
return put_rc(std::move(rc), now);
}
return false;
}

View File

@ -26,7 +26,7 @@ namespace llarp
{
std::unordered_map<RouterID, RemoteRC> known_rcs;
const Router& router;
Router& router;
const fs::path m_Root;
const std::function<void(std::function<void()>)> disk;
@ -42,19 +42,31 @@ namespace llarp
std::unordered_map<RouterID, RemoteRC> bootstraps;
// Router lists for snodes
// whitelist = active routers
std::unordered_set<RouterID> router_whitelist;
// greylist = fully funded, but decommissioned routers
std::unordered_set<RouterID> router_greylist;
// greenlist = registered but not fully-staked routers
std::unordered_set<RouterID> router_greenlist;
// all registered relays (snodes)
std::unordered_set<RouterID> registered_routers;
std::unordered_map<RouterID, rc_time> last_rc_update_times;
// Router list for clients
std::unordered_set<RouterID> client_known_rcs;
// only ever use to specific edges as path first-hops
std::unordered_set<RouterID> pinned_edges;
// rc update info
RouterID rc_fetch_source;
rc_time last_rc_update_relay_timestamp;
std::unordered_set<RouterID> router_id_fetch_sources;
std::unordered_map<RouterID, std::vector<RouterID>> router_id_fetch_responses;
// process responses once all are received (or failed/timed out)
size_t router_id_response_count{0};
bool
want_rc(const RouterID& rid) const;
@ -80,6 +92,38 @@ namespace llarp
return registered_routers;
}
const std::unordered_map<RouterID, RemoteRC>&
get_rcs() const
{
return known_rcs;
}
const std::unordered_map<RouterID, rc_time>&
get_last_rc_update_times() const
{
return last_rc_update_times;
}
// If we receive a set of RCs from our current RC source relay, we consider
// that relay to be a bad source of RCs and we randomly choose a new one.
//
// When using a new RC fetch relay, we first re-fetch the full RC list and, if
// that aligns with our RouterID list, we go back to periodic updates from that relay.
//
// This will respect edge-pinning and attempt to use a relay we already have
// a connection with.
void
rotate_rc_source();
void
ingest_rcs(RouterID source, std::vector<RemoteRC> rcs, rc_time timestamp);
void
ingest_router_ids(RouterID source, std::vector<RouterID> ids);
void
update_rcs();
void
set_router_whitelist(
const std::vector<RouterID>& whitelist,
@ -225,12 +269,18 @@ namespace llarp
/// put (or replace) the RC if we consider it valid (want_rc). returns true if put.
bool
put_rc(RemoteRC rc);
put_rc(
RemoteRC rc,
rc_time now =
std::chrono::time_point_cast<std::chrono::seconds>(std::chrono::system_clock::now()));
/// if we consider it valid (want_rc),
/// put this rc into the cache if it is not there or is newer than the one there already
/// returns true if the rc was inserted
bool
put_rc_if_newer(RemoteRC rc);
put_rc_if_newer(
RemoteRC rc,
rc_time now =
std::chrono::time_point_cast<std::chrono::seconds>(std::chrono::system_clock::now()));
};
} // namespace llarp

View File

@ -835,6 +835,13 @@ namespace llarp
next_rc_gossip = now_timepoint + RouterContact::STALE_AGE - random_delta;
}
// (client-only) periodically fetch updated RCs
if (now_timepoint - last_rc_fetch > RC_UPDATE_INTERVAL)
{
node_db()->update_rcs();
last_rc_fetch = now_timepoint;
}
// remove RCs for nodes that are no longer allowed by network policy
node_db()->RemoveIf([&](const RemoteRC& rc) -> bool {
// don't purge bootstrap nodes from nodedb

View File

@ -57,6 +57,8 @@ namespace llarp
static constexpr size_t INTROSET_STORAGE_REDUNDANCY =
(INTROSET_RELAY_REDUNDANCY * INTROSET_REQS_PER_RELAY);
static const std::chrono::seconds RC_UPDATE_INTERVAL = 5min;
struct Contacts;
struct Router : std::enable_shared_from_this<Router>
@ -129,6 +131,9 @@ namespace llarp
std::chrono::system_clock::time_point next_rc_gossip{
std::chrono::system_clock::time_point::min()};
std::chrono::system_clock::time_point last_rc_fetch{
std::chrono::system_clock::time_point::min()};
// should we be sending padded messages every interval?
bool send_padding = false;