diff --git a/external/oxen-encoding b/external/oxen-encoding index a7de63756..f6172d58d 160000 --- a/external/oxen-encoding +++ b/external/oxen-encoding @@ -1 +1 @@ -Subproject commit a7de63756dcc5c31cb899a4b810e6434b1a7c01c +Subproject commit f6172d58d3358473a4c98d96270058a32e166d5f diff --git a/llarp/link/link_manager.cpp b/llarp/link/link_manager.cpp index 6f75b3d8d..a42d98b92 100644 --- a/llarp/link/link_manager.cpp +++ b/llarp/link/link_manager.cpp @@ -6,10 +6,13 @@ #include #include #include +#include #include #include #include +#include + #include #include @@ -427,6 +430,120 @@ namespace llarp } } + void + LinkManager::fetch_rcs( + const RouterID& source, rc_time since, const std::vector& explicit_ids) + { + send_control_message( + source, + "fetch_rcs", + RCFetchMessage::serialize(since, explicit_ids), + [this, source = source](oxen::quic::message m) { + if (m.timed_out) + { + // TODO: keep track of this failure for relay quality metrics? + log::info(link_cat, "RC Fetch to {} timed out", source); + return; + } + if (not m) + { + log::info(link_cat, "RC Fetch to {} returned error.", source); + return; + } + + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + btdc.required("rcs"); + auto btlc = btdc.consume_list_consumer(); + auto timestamp = rc_time{std::chrono::seconds{btdc.require("time")}}; + + std::vector rcs; + while (not btlc.is_finished()) + { + // TODO: maybe make RemoteRC constructor throw a bespoke exception type + // and catch it below so we know what about parsing failed? + rcs.emplace_back(btlc.consume_dict_consumer()); + } + + node_db->ingest_rcs(source, std::move(rcs), timestamp); + } + catch (const std::exception& e) + { + // TODO: Inform NodeDB of failure (perhaps just a call to rotate_rc_source()) + log::info(link_cat, "Failed to parse RC Fetch response from {}", source); + return; + } + }); + } + + void + LinkManager::handle_fetch_rcs(oxen::quic::message m) + { + // this handler should not be registered for clients + assert(_router.is_service_node()); + + const auto& rcs = node_db->get_rcs(); + const auto now = + std::chrono::time_point_cast(std::chrono::system_clock::now()); + try + { + oxenc::bt_dict_consumer btdc{m.body()}; + + btdc.required("explicit_ids"); + auto explicit_ids = btdc.consume_list>(); + auto since_time = rc_time{std::chrono::seconds{btdc.require("since")}}; + + if (explicit_ids.size() > (rcs.size() / 4)) + { + log::info( + link_cat, "Remote requested too many relay IDs (greater than 1/4 of what we have)."); + m.respond( + serialize_response({{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + return; + } + + std::unordered_set explicit_relays; + for (auto& sv : explicit_ids) + { + if (sv.size() != RouterID::SIZE) + { + m.respond(serialize_response( + {{messages::STATUS_KEY, RCFetchMessage::INVALID_REQUEST}})); + return; + } + explicit_relays.emplace(reinterpret_cast(sv.data())); + } + + oxenc::bt_dict_producer resp; + + { + auto btlp = resp.append_list("rcs"); + + const auto& last_time = node_db->get_last_rc_update_times(); + + // if since_time isn't epoch start, subtract a bit for buffer + if (since_time != decltype(since_time)::min()) + since_time -= 5s; + + for (const auto& [_, rc] : rcs) + { + if (last_time.at(rc.router_id()) > since_time or explicit_relays.count(rc.router_id())) + btlp.append_encoded(rc.view()); + } + } + + resp.append("time", now.time_since_epoch().count()); + + m.respond(std::move(resp).str(), false); + } + catch (const std::exception& e) + { + log::info(link_cat, "Exception handling RC Fetch request: {}", e.what()); + m.respond(messages::ERROR_RESPONSE); + } + } + bool LinkManager::have_connection_to(const RouterID& remote, bool client_only) const { diff --git a/llarp/link/link_manager.hpp b/llarp/link/link_manager.hpp index 9967123cc..219cd7ca8 100644 --- a/llarp/link/link_manager.hpp +++ b/llarp/link/link_manager.hpp @@ -226,6 +226,12 @@ namespace llarp void handle_gossip_rc(oxen::quic::message m); + void + fetch_rcs(const RouterID& source, rc_time since, const std::vector& explicit_ids); + + void + handle_fetch_rcs(oxen::quic::message m); + bool have_connection_to(const RouterID& remote, bool client_only = false) const; diff --git a/llarp/messages/rc.hpp b/llarp/messages/rc.hpp new file mode 100644 index 000000000..3aafc5e4c --- /dev/null +++ b/llarp/messages/rc.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include "common.hpp" + +namespace llarp::RCFetchMessage +{ + inline constexpr auto INVALID_REQUEST = "Invalid relay ID requested."sv; + + inline static std::string + serialize(std::chrono::system_clock::time_point since, const std::vector& explicit_ids) + { + oxenc::bt_dict_producer btdp; + + try + { + btdp.append("since", since.time_since_epoch() / 1s); + auto id_list = btdp.append_list("explicit_ids"); + for (const auto& rid : explicit_ids) + id_list.append(rid.ToView()); + } + catch (...) + { + log::error(link_cat, "Error: RCFetchMessage failed to bt encode contents!"); + } + + return std::move(btdp).str(); + } +} // namespace llarp::RCFetchMessage diff --git a/llarp/nodedb.cpp b/llarp/nodedb.cpp index 092bebb6d..9fbf8b4ad 100644 --- a/llarp/nodedb.cpp +++ b/llarp/nodedb.cpp @@ -112,6 +112,60 @@ namespace llarp bootstraps.emplace(rc.router_id(), rc); } + void + NodeDB::rotate_rc_source() + {} + + // TODO: trust model + void + NodeDB::ingest_rcs(RouterID source, std::vector rcs, rc_time timestamp) + { + (void)source; + + // TODO: if we don't currently have a "trusted" relay we've been fetching from, + // this will be a full list of RCs. We need to first check if it aligns closely + // with our trusted RouterID list, then replace our RCs with the incoming set. + + for (auto& rc : rcs) + put_rc_if_newer(std::move(rc), timestamp); + + // TODO: if we have a "trusted" relay we've been fetching from, this will be + // an incremental update to the RC list, so *after* insertion we check if the + // RCs' RouterIDs closely match our trusted RouterID list. + + last_rc_update_relay_timestamp = timestamp; + } + + // TODO: trust model + void + NodeDB::ingest_router_ids(RouterID source, std::vector ids) + { + router_id_fetch_responses[source] = std::move(ids); + + router_id_response_count++; + if (router_id_response_count == router_id_fetch_sources.size()) + { + // TODO: reconcile all the responses + } + } + + void + NodeDB::update_rcs() + { + std::vector needed; + + const auto now = + std::chrono::time_point_cast(std::chrono::system_clock::now()); + for (const auto& [rid, rc] : known_rcs) + { + if (now - rc.timestamp() > RouterContact::OUTDATED_AGE) + needed.push_back(rid); + } + + router.link_manager().fetch_rcs( + rc_fetch_source, last_rc_update_relay_timestamp, std::move(needed)); + } + void NodeDB::set_router_whitelist( const std::vector& whitelist, @@ -277,13 +331,14 @@ namespace llarp } bool - NodeDB::put_rc(RemoteRC rc) + NodeDB::put_rc(RemoteRC rc, rc_time now) { const auto& rid = rc.router_id(); if (not want_rc(rid)) return false; known_rcs.erase(rid); known_rcs.emplace(rid, std::move(rc)); + last_rc_update_times[rid] = now; return true; } @@ -294,12 +349,12 @@ namespace llarp } bool - NodeDB::put_rc_if_newer(RemoteRC rc) + NodeDB::put_rc_if_newer(RemoteRC rc, rc_time now) { auto itr = known_rcs.find(rc.router_id()); if (itr == known_rcs.end() or itr->second.other_is_newer(rc)) { - return put_rc(std::move(rc)); + return put_rc(std::move(rc), now); } return false; } diff --git a/llarp/nodedb.hpp b/llarp/nodedb.hpp index e126004b8..58eea3b33 100644 --- a/llarp/nodedb.hpp +++ b/llarp/nodedb.hpp @@ -26,7 +26,7 @@ namespace llarp { std::unordered_map known_rcs; - const Router& router; + Router& router; const fs::path m_Root; const std::function)> disk; @@ -42,19 +42,31 @@ namespace llarp std::unordered_map bootstraps; + // Router lists for snodes // whitelist = active routers std::unordered_set router_whitelist; // greylist = fully funded, but decommissioned routers std::unordered_set router_greylist; // greenlist = registered but not fully-staked routers std::unordered_set router_greenlist; - // all registered relays (snodes) std::unordered_set registered_routers; + std::unordered_map last_rc_update_times; + + // Router list for clients + std::unordered_set client_known_rcs; // only ever use to specific edges as path first-hops std::unordered_set pinned_edges; + // rc update info + RouterID rc_fetch_source; + rc_time last_rc_update_relay_timestamp; + std::unordered_set router_id_fetch_sources; + std::unordered_map> router_id_fetch_responses; + // process responses once all are received (or failed/timed out) + size_t router_id_response_count{0}; + bool want_rc(const RouterID& rid) const; @@ -80,6 +92,38 @@ namespace llarp return registered_routers; } + const std::unordered_map& + get_rcs() const + { + return known_rcs; + } + + const std::unordered_map& + get_last_rc_update_times() const + { + return last_rc_update_times; + } + + // If we receive a set of RCs from our current RC source relay, we consider + // that relay to be a bad source of RCs and we randomly choose a new one. + // + // When using a new RC fetch relay, we first re-fetch the full RC list and, if + // that aligns with our RouterID list, we go back to periodic updates from that relay. + // + // This will respect edge-pinning and attempt to use a relay we already have + // a connection with. + void + rotate_rc_source(); + + void + ingest_rcs(RouterID source, std::vector rcs, rc_time timestamp); + + void + ingest_router_ids(RouterID source, std::vector ids); + + void + update_rcs(); + void set_router_whitelist( const std::vector& whitelist, @@ -225,12 +269,18 @@ namespace llarp /// put (or replace) the RC if we consider it valid (want_rc). returns true if put. bool - put_rc(RemoteRC rc); + put_rc( + RemoteRC rc, + rc_time now = + std::chrono::time_point_cast(std::chrono::system_clock::now())); /// if we consider it valid (want_rc), /// put this rc into the cache if it is not there or is newer than the one there already /// returns true if the rc was inserted bool - put_rc_if_newer(RemoteRC rc); + put_rc_if_newer( + RemoteRC rc, + rc_time now = + std::chrono::time_point_cast(std::chrono::system_clock::now())); }; } // namespace llarp diff --git a/llarp/router/router.cpp b/llarp/router/router.cpp index 8eb2c33c8..1ac96bb7d 100644 --- a/llarp/router/router.cpp +++ b/llarp/router/router.cpp @@ -835,6 +835,13 @@ namespace llarp next_rc_gossip = now_timepoint + RouterContact::STALE_AGE - random_delta; } + // (client-only) periodically fetch updated RCs + if (now_timepoint - last_rc_fetch > RC_UPDATE_INTERVAL) + { + node_db()->update_rcs(); + last_rc_fetch = now_timepoint; + } + // remove RCs for nodes that are no longer allowed by network policy node_db()->RemoveIf([&](const RemoteRC& rc) -> bool { // don't purge bootstrap nodes from nodedb diff --git a/llarp/router/router.hpp b/llarp/router/router.hpp index edf7cabad..cf2e60f7f 100644 --- a/llarp/router/router.hpp +++ b/llarp/router/router.hpp @@ -57,6 +57,8 @@ namespace llarp static constexpr size_t INTROSET_STORAGE_REDUNDANCY = (INTROSET_RELAY_REDUNDANCY * INTROSET_REQS_PER_RELAY); + static const std::chrono::seconds RC_UPDATE_INTERVAL = 5min; + struct Contacts; struct Router : std::enable_shared_from_this @@ -129,6 +131,9 @@ namespace llarp std::chrono::system_clock::time_point next_rc_gossip{ std::chrono::system_clock::time_point::min()}; + std::chrono::system_clock::time_point last_rc_fetch{ + std::chrono::system_clock::time_point::min()}; + // should we be sending padded messages every interval? bool send_padding = false;