(WIP) new bootstrap logic

This commit is contained in:
Thomas Winget 2023-11-28 20:48:58 -05:00
parent 51eab687d3
commit 13e2082862
4 changed files with 141 additions and 4 deletions

View File

@ -8,7 +8,7 @@ namespace llarp::RCFetchMessage
messages::serialize_response({{messages::STATUS_KEY, "Invalid relay ID requested"}});
inline static std::string
serialize(std::chrono::system_clock::time_point since, const std::vector<RouterID>& explicit_ids)
serialize(std::chrono::system_clock::time_point since, const std::vector<RouterID>& explicit_ids = {})
{
oxenc::bt_dict_producer btdp;

View File

@ -106,6 +106,101 @@ namespace llarp
return registered_routers.count(rid);
}
void
NodeDB::check_bootstrap_state()
{
size_t active_count{0};
size_t stale_count{0};
auto now = time_now_ms();
for (const auto& [rid, rc] : known_rcs)
{
if (not rc.is_outdated(now))
active_count++;
else
stale_count++;
if (active_count > ROUTER_ID_SOURCE_COUNT)
break;
}
if (active_count > ROUTER_ID_SOURCE_COUNT)
{
log::info(logcat, "We appear to be bootstrapped.");
_bootstrapped = true;
return;
}
_bootstrapped = false;
if (stale_count > ROUTER_ID_SOURCE_COUNT)
{
log::info(logcat, "We need to soft-bootstrap from stale RCs.");
// TODO: initiate soft-bootstrap
return;
}
log::info(logcat, "We need to bootstrap from scratch.");
bootstrap();
}
void
NodeDB::bootstrap(size_t index)
{
// if we're here, our RC state is unusable; clear it
known_rcs.clear();
last_rc_update_times.clear();
// bootstrapping has failed completely, inform Router to exit.
if (index >= bootstrap_order.size())
{
log::error(logcat, "Bootstrapping has failed from all bootstraps; exiting.");
_router.Stop();
return;
}
const auto& rid = bootstrap_order[index];
_router.link_manager().send_control_message(
rid,
"fetch_rcs",
RCFetchMessage::serialize(rc_time::min()),
[this, src = rid, index](oxen::quic::message m) {
// TODO (Tom): DRY this out with the other invocations of fetch_rcs in here
try
{
oxenc::bt_dict_consumer btdc{m.body()};
if (not m)
{
auto reason = btdc.require<std::string_view>(messages::STATUS_KEY);
log::info(logcat, "RC fetch to {} returned error: {}", src, reason);
}
else
{
auto btlc = btdc.require<oxenc::bt_list_consumer>("rcs"sv);
auto timestamp = rc_time{std::chrono::seconds{btdc.require<int64_t>("time"sv)}};
std::vector<RemoteRC> rcs;
while (not btlc.is_finished())
{
rcs.emplace_back(btlc.consume_dict_consumer());
}
// TODO (Tom): add flag to mark these as coming from a bootstrap, rather
// than an arbitrary relay. A relay will still check that the RCs
// match its registered relays lists; a client will just trust them.
if (process_fetched_rcs(src, std::move(rcs), timestamp))
return;
}
}
catch (const std::exception& e)
{
log::info(logcat, "Failed to parse RC fetch response from {}: {}", src, e.what());
}
// failure, try next bootstrap
log::warning(logcat, "Failed to bootstrap from {}, trying next.", src);
bootstrap(index + 1);
});
}
void
NodeDB::set_bootstrap_routers(const std::set<RemoteRC>& rcs)
{
@ -113,7 +208,11 @@ namespace llarp
for (const auto& rc : rcs)
{
bootstraps.emplace(rc.router_id(), rc);
bootstrap_order.push_back(rc.router_id());
}
// so we use bootstraps in a random order
std::shuffle(bootstrap_order.begin(), bootstrap_order.end(), llarp::csrng);
}
bool
@ -806,6 +905,8 @@ namespace llarp
}
itr++;
}
check_bootstrap_state();
}
std::optional<RouterID>
@ -900,6 +1001,12 @@ namespace llarp
for (const auto& fpath : purge)
fs::remove(fpath);
}
// (client-only) after loading RCs, check if we're in a usable state
// relay will do this after set_router_whitelist, as it gets its RouterID list
// from oxend and needs that before it can decide.
if (not _router.is_service_node())
check_bootstrap_state();
}
void

View File

@ -47,6 +47,7 @@ namespace llarp
get_path_by_pubkey(RouterID pk) const;
std::unordered_map<RouterID, RemoteRC> bootstraps;
std::vector<RouterID> bootstrap_order;
// Router lists for snodes
// whitelist = active routers
@ -80,13 +81,40 @@ namespace llarp
std::atomic<bool> is_fetching_rids{false}, is_fetching_rcs{false};
std::atomic<int> fetch_failures{0};
bool _bootstrapped{true};
bool
want_rc(const RouterID& rid) const;
/// Check if we need to bootstrap, and set that in motion if so
///
/// For clients, this is called after loading the db (on startup)
/// after bootstrap success, a client will only call this again if the number
/// of non-stale RCs goes below ROUTER_ID_SOURCE_COUNT + 1, as this means we're
/// no longer confident we know enough active relays.
///
/// For relays, this is called when receiving whitelist updates from oxend.
void
check_bootstrap_state();
/// Bootstrap from scratch, i.e. from one of `bootstraps`
/// If the index given is zero, shuffle the map so we try them in a random order.
/// If the index given is nonzero, advance an iterator that many places and try.
/// If the iterator advances to ::end(), we've failed to bootstrap from all of
/// them and need to inform Router to shut down.
void
bootstrap(size_t index = 0);
public:
void
set_bootstrap_routers(const std::set<RemoteRC>& rcs);
bool
bootstrapped() const
{
return _bootstrapped;
}
const std::unordered_set<RouterID>&
whitelist() const
{

View File

@ -794,6 +794,8 @@ namespace llarp
return status;
}
// TODO (Tom): rearrange so anything we want to do regardless of network state
// always happens, and the rest only happens if we're bootstrapped.
void
Router::Tick()
{
@ -1080,9 +1082,6 @@ namespace llarp
return false;
}
log::info(logcat, "Loading NodeDB from disk...");
_node_db->load_from_disk();
_contacts = std::make_shared<Contacts>(llarp::dht::Key_t(pubkey()), *this);
for (const auto& rc : bootstrap_rc_list)
@ -1092,6 +1091,9 @@ namespace llarp
log::info(logcat, "Added bootstrap node (rid: {})", rc.router_id());
}
log::info(logcat, "Loading NodeDB from disk...");
_node_db->load_from_disk();
log::info(logcat, "Router populated NodeDB with {} routers", _node_db->num_loaded());
_loop->call_every(ROUTER_TICK_INTERVAL, weak_from_this(), [this] { Tick(); });