From 6952e8f7054654b2ea34e5e6608ebff9a2a5a6e9 Mon Sep 17 00:00:00 2001 From: Thomas Winget Date: Fri, 17 Nov 2023 02:41:42 -0500 Subject: [PATCH] Add command to fetch RCs from remote node This command will be called periodically by clients to maintain a list of RCs of active relay nodes. It will require another command (future commit) to fetch the RouterIDs from many nodes and reconcile those so we have some notion of good-ness of the RCs we're getting; if we get what seems to be a bad set of RCs (this concept not yet implemented), we will choose a different relay to fetch RCs from. These are left as TODOs for now. --- external/oxen-encoding | 2 +- llarp/link/link_manager.cpp | 117 ++++++++++++++++++++++++++++++++++++ llarp/link/link_manager.hpp | 6 ++ llarp/messages/rc.hpp | 28 +++++++++ llarp/nodedb.cpp | 61 ++++++++++++++++++- llarp/nodedb.hpp | 58 ++++++++++++++++-- llarp/router/router.cpp | 7 +++ llarp/router/router.hpp | 5 ++ 8 files changed, 276 insertions(+), 8 deletions(-) create mode 100644 llarp/messages/rc.hpp diff --git a/external/oxen-encoding b/external/oxen-encoding index a7de63756..f6172d58d 160000 --- a/external/oxen-encoding +++ b/external/oxen-encoding @@ -1 +1 @@ -Subproject commit a7de63756dcc5c31cb899a4b810e6434b1a7c01c +Subproject commit f6172d58d3358473a4c98d96270058a32e166d5f diff --git a/llarp/link/link_manager.cpp b/llarp/link/link_manager.cpp index 6f75b3d8d..a42d98b92 100644 --- a/llarp/link/link_manager.cpp +++ b/llarp/link/link_manager.cpp @@ -6,10 +6,13 @@ #include #include #include +#include #include #include #include +#include + #include #include @@ -427,6 +430,120 @@ namespace llarp } } + void + LinkManager::fetch_rcs( + const RouterID& source, rc_time since, const std::vector& explicit_ids) + { + send_control_message( + source, + "fetch_rcs", + RCFetchMessage::serialize(since, explicit_ids), + [this, source = source](oxen::quic::message m) { + if (m.timed_out) + { + // TODO: keep track of this failure for relay quality metrics? + log::info(link_cat, "RC Fetch to {} timed out", source); + return; + } + if (not m) + { + log::info(link_cat, "RC Fetch to {} returned error.", source); + return; + } + + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + btdc.required("rcs"); + auto btlc = btdc.consume_list_consumer(); + auto timestamp = rc_time{std::chrono::seconds{btdc.require("time")}}; + + std::vector rcs; + while (not btlc.is_finished()) + { + // TODO: maybe make RemoteRC constructor throw a bespoke exception type + // and catch it below so we know what about parsing failed? + rcs.emplace_back(btlc.consume_dict_consumer()); + } + + node_db->ingest_rcs(source, std::move(rcs), timestamp); + } + catch (const std::exception& e) + { + // TODO: Inform NodeDB of failure (perhaps just a call to rotate_rc_source()) + log::info(link_cat, "Failed to parse RC Fetch response from {}", source); + return; + } + }); + } + + void + LinkManager::handle_fetch_rcs(oxen::quic::message m) + { + // this handler should not be registered for clients + assert(_router.is_service_node()); + + const auto& rcs = node_db->get_rcs(); + const auto now = + std::chrono::time_point_cast(std::chrono::system_clock::now()); + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + + btdc.required("explicit_ids"); + auto explicit_ids = btdc.consume_list>(); + auto since_time = rc_time{std::chrono::seconds{btdc.require("since")}}; + + if (explicit_ids.size() > (rcs.size() / 4)) + { + log::info( + link_cat, "Remote requested too many relay IDs (greater than 1/4 of what we have)."); + m.respond( + serialize_response({{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + return; + } + + std::unordered_set explicit_relays; + for (auto& sv : explicit_ids) + { + if (sv.size() != RouterID::SIZE) + { + m.respond(serialize_response( + {{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + return; + } + explicit_relays.emplace(reinterpret_cast(sv.data())); + } + + oxenc::bt_dict_producer resp; + + { + auto btlp = resp.append_list("rcs"); + + const auto& last_time = node_db->get_last_rc_update_times(); + + // if since_time isn't epoch start, subtract a bit for buffer + if (since_time != decltype(since_time)::min()) + since_time -= 5s; + + for (const auto& [_, rc] : rcs) + { + if (last_time.at(rc.router_id()) > since_time or explicit_relays.count(rc.router_id())) + btlp.append_encoded(rc.view()); + } + } + + resp.append("time", now.time_since_epoch().count()); + + m.respond(std::move(resp).str(), false); + } + catch (const std::exception& e) + { + log::info(link_cat, "Exception handling RC Fetch request: {}", e.what()); + m.respond(messages::ERROR_RESPONSE); + } + } + bool LinkManager::have_connection_to(const RouterID& remote, bool client_only) const { diff --git a/llarp/link/link_manager.hpp b/llarp/link/link_manager.hpp index 9967123cc..219cd7ca8 100644 --- a/llarp/link/link_manager.hpp +++ b/llarp/link/link_manager.hpp @@ -226,6 +226,12 @@ namespace llarp void handle_gossip_rc(oxen::quic::message m); + void + fetch_rcs(const RouterID& source, rc_time since, const std::vector& explicit_ids); + + void + handle_fetch_rcs(oxen::quic::message m); + bool have_connection_to(const RouterID& remote, bool client_only = false) const; diff --git a/llarp/messages/rc.hpp b/llarp/messages/rc.hpp new file mode 100644 index 000000000..3aafc5e4c --- /dev/null +++ b/llarp/messages/rc.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include "common.hpp" + +namespace llarp::RCFetchMessage +{ + inline constexpr auto INVALID_REQUEST = "Invalid relay ID requested."sv; + + inline static std::string + serialize(std::chrono::system_clock::time_point since, const std::vector& explicit_ids) + { + oxenc::bt_dict_producer btdp; + + try + { + btdp.append("since", since.time_since_epoch() / 1s); + auto id_list = btdp.append_list("explicit_ids"); + for (const auto& rid : explicit_ids) + id_list.append(rid.ToView()); + } + catch (...) + { + log::error(link_cat, "Error: RCFetchMessage failed to bt encode contents!"); + } + + return std::move(btdp).str(); + } +} // namespace llarp::RCFetchMessage diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index 092bebb6d..9fbf8b4ad 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -112,6 +112,60 @@ namespace llarp bootstraps.emplace(rc.router_id(), rc); } + void + NodeDB::rotate_rc_source() + {} + + // TODO: trust model + void + NodeDB::ingest_rcs(RouterID source, std::vector rcs, rc_time timestamp) + { + (void)source; + + // TODO: if we don't currently have a "trusted" relay we've been fetching from, + // this will be a full list of RCs. We need to first check if it aligns closely + // with our trusted RouterID list, then replace our RCs with the incoming set. + + for (auto& rc : rcs) + put_rc_if_newer(std::move(rc), timestamp); + + // TODO: if we have a "trusted" relay we've been fetching from, this will be + // an incremental update to the RC list, so *after* insertion we check if the + // RCs' RouterIDs closely match our trusted RouterID list. + + last_rc_update_relay_timestamp = timestamp; + } + + // TODO: trust model + void + NodeDB::ingest_router_ids(RouterID source, std::vector ids) + { + router_id_fetch_responses[source] = std::move(ids); + + router_id_response_count++; + if (router_id_response_count == router_id_fetch_sources.size()) + { + // TODO: reconcile all the responses + } + } + + void + NodeDB::update_rcs() + { + std::vector needed; + + const auto now = + std::chrono::time_point_cast(std::chrono::system_clock::now()); + for (const auto& [rid, rc] : known_rcs) + { + if (now - rc.timestamp() > RouterContact::OUTDATED_AGE) + needed.push_back(rid); + } + + router.link_manager().fetch_rcs( + rc_fetch_source, last_rc_update_relay_timestamp, std::move(needed)); + } + void NodeDB::set_router_whitelist( const std::vector& whitelist, @@ -277,13 +331,14 @@ namespace llarp } bool - NodeDB::put_rc(RemoteRC rc) + NodeDB::put_rc(RemoteRC rc, rc_time now) { const auto& rid = rc.router_id(); if (not want_rc(rid)) return false; known_rcs.erase(rid); known_rcs.emplace(rid, std::move(rc)); + last_rc_update_times[rid] = now; return true; } @@ -294,12 +349,12 @@ namespace llarp } bool - NodeDB::put_rc_if_newer(RemoteRC rc) + NodeDB::put_rc_if_newer(RemoteRC rc, rc_time now) { auto itr = known_rcs.find(rc.router_id()); if (itr == known_rcs.end() or itr->second.other_is_newer(rc)) { - return put_rc(std::move(rc)); + return put_rc(std::move(rc), now); } return false; } diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index e126004b8..58eea3b33 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -26,7 +26,7 @@ namespace llarp { std::unordered_map known_rcs; - const Router& router; + Router& router; const fs::path m_Root; const std::function)> disk; @@ -42,19 +42,31 @@ namespace llarp std::unordered_map bootstraps; + // Router lists for snodes // whitelist = active routers std::unordered_set router_whitelist; // greylist = fully funded, but decommissioned routers std::unordered_set router_greylist; // greenlist = registered but not fully-staked routers std::unordered_set router_greenlist; - // all registered relays (snodes) std::unordered_set registered_routers; + std::unordered_map last_rc_update_times; + + // Router list for clients + std::unordered_set client_known_rcs; // only ever use to specific edges as path first-hops std::unordered_set pinned_edges; + // rc update info + RouterID rc_fetch_source; + rc_time last_rc_update_relay_timestamp; + std::unordered_set router_id_fetch_sources; + std::unordered_map> router_id_fetch_responses; + // process responses once all are received (or failed/timed out) + size_t router_id_response_count{0}; + bool want_rc(const RouterID& rid) const; @@ -80,6 +92,38 @@ namespace llarp return registered_routers; } + const std::unordered_map& + get_rcs() const + { + return known_rcs; + } + + const std::unordered_map& + get_last_rc_update_times() const + { + return last_rc_update_times; + } + + // If we receive a set of RCs from our current RC source relay, we consider + // that relay to be a bad source of RCs and we randomly choose a new one. + // + // When using a new RC fetch relay, we first re-fetch the full RC list and, if + // that aligns with our RouterID list, we go back to periodic updates from that relay. + // + // This will respect edge-pinning and attempt to use a relay we already have + // a connection with. + void + rotate_rc_source(); + + void + ingest_rcs(RouterID source, std::vector rcs, rc_time timestamp); + + void + ingest_router_ids(RouterID source, std::vector ids); + + void + update_rcs(); + void set_router_whitelist( const std::vector& whitelist, @@ -225,12 +269,18 @@ namespace llarp /// put (or replace) the RC if we consider it valid (want_rc). returns true if put. bool - put_rc(RemoteRC rc); + put_rc( + RemoteRC rc, + rc_time now = + std::chrono::time_point_cast(std::chrono::system_clock::now())); /// if we consider it valid (want_rc), /// put this rc into the cache if it is not there or is newer than the one there already /// returns true if the rc was inserted bool - put_rc_if_newer(RemoteRC rc); + put_rc_if_newer( + RemoteRC rc, + rc_time now = + std::chrono::time_point_cast(std::chrono::system_clock::now())); }; } // namespace llarp diff --git a/llarp/router/router.cpp b/llarp/router/router.cpp index 8eb2c33c8..1ac96bb7d 100644 --- a/llarp/router/router.cpp +++ b/llarp/router/router.cpp @@ -835,6 +835,13 @@ namespace llarp next_rc_gossip = now_timepoint + RouterContact::STALE_AGE - random_delta; } + // (client-only) periodically fetch updated RCs + if (now_timepoint - last_rc_fetch > RC_UPDATE_INTERVAL) + { + node_db()->update_rcs(); + last_rc_fetch = now_timepoint; + } + // remove RCs for nodes that are no longer allowed by network policy node_db()->RemoveIf([&](const RemoteRC& rc) -> bool { // don't purge bootstrap nodes from nodedb diff --git a/llarp/router/router.hpp b/llarp/router/router.hpp index edf7cabad..cf2e60f7f 100644 --- a/llarp/router/router.hpp +++ b/llarp/router/router.hpp @@ -57,6 +57,8 @@ namespace llarp static constexpr size_t INTROSET_STORAGE_REDUNDANCY = (INTROSET_RELAY_REDUNDANCY * INTROSET_REQS_PER_RELAY); + static const std::chrono::seconds RC_UPDATE_INTERVAL = 5min; + struct Contacts; struct Router : std::enable_shared_from_this @@ -129,6 +131,9 @@ namespace llarp std::chrono::system_clock::time_point next_rc_gossip{ std::chrono::system_clock::time_point::min()}; + std::chrono::system_clock::time_point last_rc_fetch{ + std::chrono::system_clock::time_point::min()}; + // should we be sending padded messages every interval? bool send_padding = false;