Add lokinet reachability to quorum testing

It works just like storage server testing.

Renames the report_peer_storage_server_status to report_peer_status, and
repurposes the code to handle both SS and lokinet.

This *doesn't* need a HF by design because the reason bit field was
deliberately designed so that we can add reason fields (older clients
will just ignore unknown bits).
This commit is contained in:
Jason Rhinelander 2021-06-04 16:17:31 -03:00
parent 470bca770e
commit bbb8bdb1af
14 changed files with 197 additions and 129 deletions

View File

@ -84,6 +84,7 @@ std::vector<std::string> readable_reasons(uint16_t decomm_reason) {
if (decomm_reason & storage_server_unreachable) results.push_back("Storage Server Unreachable");
if (decomm_reason & timestamp_response_unreachable) results.push_back("Unreachable for Timestamp Check");
if (decomm_reason & timesync_status_out_of_sync) results.push_back("Time out of sync");
if (decomm_reason & lokinet_unreachable) results.push_back("Lokinet Unreachable");
return results;
}
@ -95,6 +96,7 @@ std::vector<std::string> coded_reasons(uint16_t decomm_reason) {
if (decomm_reason & storage_server_unreachable) results.push_back("storage");
if (decomm_reason & timestamp_response_unreachable) results.push_back("timecheck");
if (decomm_reason & timesync_status_out_of_sync) results.push_back("timesync");
if (decomm_reason & lokinet_unreachable) results.push_back("lokinet");
return results;
}

View File

@ -422,7 +422,8 @@ namespace cryptonote
missed_pulse_participations = 1 << 2,
storage_server_unreachable = 1 << 3,
timestamp_response_unreachable = 1 << 4,
timesync_status_out_of_sync = 1 << 5
timesync_status_out_of_sync = 1 << 5,
lokinet_unreachable = 1 << 6,
};
// Returns human-readable reason strings (e.g. "Missed Uptime Proofs") for the given reason bits

View File

@ -241,7 +241,7 @@ namespace config
inline constexpr auto UPTIME_PROOF_CHECK_INTERVAL = 30s; // How frequently to check whether we need to broadcast a proof
inline constexpr auto UPTIME_PROOF_FREQUENCY = 1h; // How often to send proofs out to the network since the last proof we successfully sent. (Approximately; this can be up to CHECK_INTERFACE/2 off in either direction). The minimum accepted time between proofs is half of this.
inline constexpr auto UPTIME_PROOF_VALIDITY = 2h + 5min; // The maximum time that we consider an uptime proof to be valid (i.e. after this time since the last proof we consider the SN to be down)
inline constexpr auto SS_MAX_FAILURE_VALIDITY = 10min; // If we don't hear any SS ping test failures for more than this long then we start considering the SN as passing for the purpose of obligation testing until we get another test result. This should be somewhat larger than SS's max re-test backoff (5min).
inline constexpr auto REACHABLE_MAX_FAILURE_VALIDITY = 5min; // If we don't hear any SS ping/lokinet session test failures for more than this long then we start considering the SN as passing for the purpose of obligation testing until we get another test result. This should be somewhat larger than SS/lokinet's max re-test backoff (2min).
// Hash domain separators
inline constexpr std::string_view HASH_KEY_BULLETPROOF_EXPONENT = "bulletproof"sv;

View File

@ -2545,11 +2545,6 @@ namespace cryptonote
return true;
}
//-----------------------------------------------------------------------------------------------
bool core::set_storage_server_peer_reachable(crypto::public_key const &pubkey, bool value)
{
return m_service_node_list.set_storage_server_peer_reachable(pubkey, value);
}
//-----------------------------------------------------------------------------------------------
void core::flush_bad_txs_cache()
{
bad_semantics_txes_lock.lock();

View File

@ -1017,11 +1017,6 @@ namespace cryptonote
*/
void flush_invalid_blocks();
/**
* @brief Record the reachability status of node's storage server
*/
bool set_storage_server_peer_reachable(crypto::public_key const &pubkey, bool value);
/// Time point at which the storage server and lokinet last pinged us
std::atomic<time_t> m_last_storage_server_ping, m_last_lokinet_ping;
std::atomic<uint16_t> m_storage_https_port, m_storage_omq_port;

View File

@ -3233,49 +3233,62 @@ namespace service_nodes
info.timesync_status.add(entry);
}
std::optional<bool> proof_info::ss_reachable(const std::chrono::steady_clock::time_point& now) const {
if (ss_last_reachable >= ss_last_unreachable)
std::optional<bool> proof_info::reachable_stats::reachable(const std::chrono::steady_clock::time_point& now) const {
if (last_reachable >= last_unreachable)
return true;
if (ss_last_unreachable > now - config::SS_MAX_FAILURE_VALIDITY)
if (last_unreachable > now - config::REACHABLE_MAX_FAILURE_VALIDITY)
return false;
// Last result was a failure, but it was a while ago, so we don't know for sure that it isn't
// reachable now:
return std::nullopt;
}
bool proof_info::ss_unreachable_for(std::chrono::seconds threshold, const std::chrono::steady_clock::time_point& now) const {
if (auto maybe_reachable = ss_reachable(now); !maybe_reachable /*stale*/ || *maybe_reachable /*good*/)
bool proof_info::reachable_stats::unreachable_for(std::chrono::seconds threshold, const std::chrono::steady_clock::time_point& now) const {
if (auto maybe_reachable = reachable(now); !maybe_reachable /*stale*/ || *maybe_reachable /*good*/)
return false;
if (ss_first_unreachable > now - threshold)
if (first_unreachable > now - threshold)
return false; // Unreachable, but for less than the grace time
return true;
}
bool service_node_list::set_storage_server_peer_reachable(crypto::public_key const &pubkey, bool reachable)
{
bool service_node_list::set_peer_reachable(bool storage_server, const crypto::public_key& pubkey, bool reachable) {
// (See .h for overview description)
std::lock_guard lock(m_sn_mutex);
const auto type = storage_server ? "storage server"sv : "lokinet"sv;
if (!m_state.service_nodes_infos.count(pubkey)) {
MDEBUG("Dropping SS reachable report: " << pubkey << " is not a registered SN pubkey");
MDEBUG("Dropping " << type << " reachable report: " << pubkey << " is not a registered SN pubkey");
return false;
}
MDEBUG("Received " << (reachable ? "reachable" : "UNREACHABLE") << " report for SN " << pubkey);
MDEBUG("Received " << type << (reachable ? " reachable" : " UNREACHABLE") << " report for SN " << pubkey);
const auto now = std::chrono::steady_clock::now();
proof_info& info = proofs[pubkey];
auto& reach = storage_server ? proofs[pubkey].ss_reachable : proofs[pubkey].lokinet_reachable;
if (reachable) {
info.ss_last_reachable = now;
info.ss_first_unreachable = NEVER;
reach.last_reachable = now;
reach.first_unreachable = NEVER;
} else {
info.ss_last_unreachable = now;
if (info.ss_first_unreachable == NEVER)
info.ss_first_unreachable = now;
reach.last_unreachable = now;
if (reach.first_unreachable == NEVER)
reach.first_unreachable = now;
}
return true;
}
bool service_node_list::set_storage_server_peer_reachable(crypto::public_key const &pubkey, bool reachable)
{
return set_peer_reachable(true, pubkey, reachable);
}
bool service_node_list::set_lokinet_peer_reachable(crypto::public_key const &pubkey, bool reachable)
{
return set_peer_reachable(false, pubkey, reachable);
}
static quorum_manager quorum_for_serialization_to_quorum_manager(service_node_list::quorum_for_serialization const &source)

View File

@ -155,20 +155,29 @@ namespace service_nodes
uint64_t effective_timestamp = 0; // Typically the same, but on recommissions it is set to the recommission block time to fend off instant obligation checks
std::array<std::pair<uint32_t, uint64_t>, 2> public_ips = {}; // (not serialized)
// See set_storage_server_peer_reachable(...)
std::chrono::steady_clock::time_point ss_last_reachable = NEVER;
std::chrono::steady_clock::time_point ss_first_unreachable = NEVER;
std::chrono::steady_clock::time_point ss_last_unreachable = NEVER;
// Returns whether or not this SS is currently (probably) reachable:
// - true if the last test was a pass (regardless of how long ago)
// - false if the last test was a recent fail (i.e. less than SS_MAX_FAILURE_VALIDITY ago)
// - nullopt if the last test was a failure, but is considered stale.
// Both true and nullopt are considered a pass for service node testing.
std::optional<bool> ss_reachable(const std::chrono::steady_clock::time_point& now = std::chrono::steady_clock::now()) const;
// Returns true if this node's SS has recently failed reachability (see above) *and* has been
// unreachable for at least the given grace time (that is: there is both a recent failure and a
// failure more than `grace` ago, with no intervening reachability pass reports).
bool ss_unreachable_for(std::chrono::seconds threshold, const std::chrono::steady_clock::time_point& now = std::chrono::steady_clock::now()) const;
// See set_storage_server_peer_reachable(...) and set_lokinet_peer_reachable(...)
struct reachable_stats {
std::chrono::steady_clock::time_point
last_reachable = NEVER,
first_unreachable = NEVER,
last_unreachable = NEVER;
// Returns whether or not this stats indicates a node that is currently (probably) reachable:
// - true if the last test was a pass (regardless of how long ago)
// - false if the last test was a recent fail (i.e. less than REACHABLE_MAX_FAILURE_VALIDITY ago)
// - nullopt if the last test was a failure, but is considered stale.
// Both true and nullopt are considered a pass for service node testing.
std::optional<bool> reachable(const std::chrono::steady_clock::time_point& now = std::chrono::steady_clock::now()) const;
// Returns true if this stats indicates a node that has recently failed reachability (see
// above) *and* has been unreachable for at least the given grace time (that is: there is
// both a recent failure and a failure more than `grace` ago, with no intervening
// reachability pass reports).
bool unreachable_for(std::chrono::seconds threshold, const std::chrono::steady_clock::time_point& now = std::chrono::steady_clock::now()) const;
};
reachable_stats ss_reachable;
reachable_stats lokinet_reachable;
// Unlike all of the above (except for timestamp), these values *do* get serialized
std::unique_ptr<uptime_proof::Proof> proof;
@ -560,13 +569,14 @@ namespace service_nodes
// Called every hour to remove proofs for expired SNs from memory and the database.
void cleanup_proofs();
// Called via RPC from storage server to report a ping test result for a remote storage server.
// Called via RPC from storage server/lokinet to report a ping test result for a remote storage
// server/lokinet.
//
// How this works (as of SS 2.0.9/oxen 9.x):
// How this works:
// - SS randomly picks probably-good nodes to test every 10s (with fuzz), and pings
// known-failing nodes to re-test them.
// - SS re-tests nodes with a linear backoff: 10s+fuzz after the first failure, then 20s+fuzz,
// then 30s+fuzz, etc. (up to ~5min retest intervals)
// then 30s+fuzz, etc. (up to ~2min retest intervals)
// - Whenever SS gets *any* ping result at all it notifies us via RPC (which lands here), and it
// is (as of 9.x) our responsibility to decide when too many bad pings should be penalized.
//
@ -576,10 +586,13 @@ namespace service_nodes
// - otherwise we consider it good. (Which means either it passed a reachability test at least
// once in the last 1h5min *or* SS stopped pinging it, perhaps because it restarted).
//
// Lokinet works essentially the same, except that its concept of a "ping" is being able to
// successfully establish a session with the given remote lokinet snode.
//
// We do all this by tracking three values:
// - ss_last_reachable
// - ss_first_unreachable
// - ss_last_unreachable
// - last_reachable
// - first_unreachable
// - last_unreachable
//
// On a good ping, we set last_reachable to the current time and clear first_unreachable. On a
// bad ping we set last_unreachable to the current time and, if first_unreachable is empty, set
@ -593,9 +606,14 @@ namespace service_nodes
// - how long it has been unreachable (now - first_unreachable, if first_unreachable is set)
//
// (Also note that the actual times references here are for convenience, 10min is actually
// SS_MAX_FAILURE_VALIDITY, and 1h5min is actually UPTIME_PROOF_VALIDITY-UPTIME_PROOF_FREQUENCY
// (which is actually 11min on testnet rather than 1h5min)).
// REACHABLE_MAX_FAILURE_VALIDITY, and 1h5min is actually
// UPTIME_PROOF_VALIDITY-UPTIME_PROOF_FREQUENCY (which is actually 11min on testnet rather than
// 1h5min)).
bool set_storage_server_peer_reachable(crypto::public_key const &pubkey, bool value);
bool set_lokinet_peer_reachable(crypto::public_key const &pubkey, bool value);
private:
bool set_peer_reachable(bool storage_server, crypto::public_key const &pubkey, bool value);
public:
struct quorum_for_serialization
{

View File

@ -45,28 +45,21 @@
namespace service_nodes
{
char const *service_node_test_results::why() const
std::optional<std::vector<std::string_view>> service_node_test_results::why() const
{
static char buf[2048];
buf[0] = 0;
char *buf_ptr = buf;
char const *buf_end = buf + sizeof(buf);
if (passed())
{
buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Service Node is passing all local tests");
}
else
{
buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Service Node is currently failing the following tests: ");
if (!uptime_proved) buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Uptime proof missing.\n");
if (!checkpoint_participation) buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Skipped voting in at least %d checkpoints.\n", (int)(QUORUM_VOTE_CHECK_COUNT - CHECKPOINT_MAX_MISSABLE_VOTES));
if (!pulse_participation) buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Skipped voting in at least %d pulse quorums.\n", (int)(QUORUM_VOTE_CHECK_COUNT - PULSE_MAX_MISSABLE_VOTES));
if (!timestamp_participation) buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Replied out of sync time for at least %d timestamp mesages.\n", (int)(QUORUM_VOTE_CHECK_COUNT - TIMESTAMP_MAX_MISSABLE_VOTES));
if (!timesync_status) buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Missed replying to at least %d timesync messages.\n", (int)(QUORUM_VOTE_CHECK_COUNT - TIMESYNC_MAX_UNSYNCED_VOTES));
buf_ptr += snprintf(buf_ptr, buf_end - buf_ptr, "Note: Storage server may not be reachable. This is only testable by an external Service Node.");
}
return buf;
return std::nullopt;
std::vector<std::string_view> results{{"Service Node is currently failing the following tests:"sv}};
if (!uptime_proved) results.push_back("Uptime proof missing."sv);
if (!checkpoint_participation) results.push_back("Skipped voting in too many checkpoints."sv);
if (!pulse_participation) results.push_back("Skipped voting in too many pulse quorums."sv);
// These ones are not likely to be useful when we are reporting on ourself:
if (!timestamp_participation) results.push_back("Too many out-of-sync timesync replies."sv);
if (!timesync_status) results.push_back("Too many missed timesync replies."sv);
if (!storage_server_reachable) results.push_back("Storage server is not reachable."sv);
if (!lokinet_reachable) results.push_back("Lokinet router is not reachable."sv);
return results;
}
quorum_cop::quorum_cop(cryptonote::core& core)
@ -87,7 +80,7 @@ namespace service_nodes
const auto& netconf = m_core.get_net_config();
service_node_test_results result; // Defaults to true for individual tests
bool ss_reachable = true;
bool ss_reachable = true, lokinet_reachable = true;
uint64_t timestamp = 0;
decltype(std::declval<proof_info>().public_ips) ips{};
@ -97,10 +90,12 @@ namespace service_nodes
service_nodes::participation_history<service_nodes::timesync_entry> timesync_status{};
constexpr std::array<uint16_t, 3> MIN_TIMESTAMP_VERSION{9,1,0};
bool check_timestamp_obligation = false;
const auto unreachable_threshold = netconf.UPTIME_PROOF_VALIDITY - netconf.UPTIME_PROOF_FREQUENCY;
m_core.get_service_node_list().access_proof(pubkey, [&](const proof_info &proof) {
ss_reachable = !proof.ss_unreachable_for(netconf.UPTIME_PROOF_VALIDITY - netconf.UPTIME_PROOF_FREQUENCY);
ss_reachable = !proof.ss_reachable.unreachable_for(unreachable_threshold);
lokinet_reachable = !proof.lokinet_reachable.unreachable_for(unreachable_threshold);
timestamp = std::max(proof.timestamp, proof.effective_timestamp);
ips = proof.public_ips;
checkpoint_participation = proof.checkpoint_participation;
@ -140,6 +135,12 @@ namespace service_nodes
result.storage_server_reachable = false;
}
if (!lokinet_reachable)
{
LOG_PRINT_L1("Service Node lokinet is not reachable for node: " << pubkey);
result.lokinet_reachable = false;
}
// IP change checks
if (ips[0].first && ips[1].first) {
// Figure out when we last had a blockchain-level IP change penalty (or when we registered);
@ -390,6 +391,7 @@ namespace service_nodes
if (!test_results.checkpoint_participation) reason |= cryptonote::Decommission_Reason::missed_checkpoints;
if (!test_results.pulse_participation) reason |= cryptonote::Decommission_Reason::missed_pulse_participations;
if (!test_results.storage_server_reachable) reason |= cryptonote::Decommission_Reason::storage_server_unreachable;
if (!test_results.lokinet_reachable) reason |= cryptonote::Decommission_Reason::lokinet_unreachable;
if (!test_results.timestamp_participation) reason |= cryptonote::Decommission_Reason::timestamp_response_unreachable;
if (!test_results.timesync_status) reason |= cryptonote::Decommission_Reason::timesync_status_out_of_sync;
int64_t credit = calculate_decommission_credit(info, latest_height);
@ -444,25 +446,24 @@ namespace service_nodes
if (info.can_be_voted_on(m_obligations_height))
{
tested_myself_once_per_block = true;
auto my_test_results = check_service_node(obligations_height_hf_version, my_keys.pub, info);
if (info.is_active())
{
if (!my_test_results.passed())
{
// NOTE: Don't warn uptime proofs if the daemon is just
// recently started and is candidate for testing (i.e.
// restarting the daemon)
if (!my_test_results.uptime_proved && live_time < 1h)
continue;
auto my_test_results = check_service_node(obligations_height_hf_version, my_keys.pub, info);
const bool print_failings = info.is_decommissioned() ||
(info.is_active() && !my_test_results.passed() &&
// Don't warn uptime proofs if the daemon is just recently started and is candidate for testing (i.e. restarting the daemon)
(my_test_results.uptime_proved || live_time >= 1h));
LOG_PRINT_L0("Service Node (yours) is active but is not passing tests for quorum: " << m_obligations_height);
LOG_PRINT_L0(my_test_results.why());
}
}
else if (info.is_decommissioned())
if (print_failings)
{
LOG_PRINT_L0("Service Node (yours) is currently decommissioned and being tested in quorum: " << m_obligations_height);
LOG_PRINT_L0(my_test_results.why());
LOG_PRINT_L0(
(info.is_decommissioned()
? "Service Node (yours) is currently decommissioned and being tested in quorum: "
: "Service Node (yours) is active but is not passing tests for quorum: ")
<< m_obligations_height);
if (auto why = my_test_results.why())
LOG_PRINT_L0(tools::join("\n", *why));
else
LOG_PRINT_L0("Service Node is passing all local tests");
LOG_PRINT_L0("(Note that some tests, such as storage server and lokinet reachability, can only assessed by remote service nodes)");
}
}
}

View File

@ -93,9 +93,20 @@ namespace service_nodes
bool timestamp_participation = true;
bool timesync_status = true;
bool storage_server_reachable = true;
bool lokinet_reachable = true;
char const *why() const;
bool passed() const { return uptime_proved && checkpoint_participation && pulse_participation && storage_server_reachable && timestamp_participation && timesync_status; }
// Returns a vector of reasons why this node is failing (nullopt if not failing).
std::optional<std::vector<std::string_view>> why() const;
constexpr bool passed() const {
return uptime_proved &&
//single_ip -- deliberately excluded (it only gives ip-change penalties, not deregs)
checkpoint_participation &&
pulse_participation &&
timestamp_participation &&
timesync_status &&
storage_server_reachable &&
lokinet_reachable;
}
};
class quorum_cop

View File

@ -1687,27 +1687,32 @@ static void append_printable_service_node_list_entry(cryptonote::network_type ne
//
// NOTE: Storage Server Test
//
stream << indent2 << "Storage Server Reachable: ";
if (entry.storage_server_first_unreachable == 0) {
if (entry.storage_server_last_reachable == 0)
stream << "Not yet tested";
else {
stream << "Yes (last tested " << get_human_time_ago(entry.storage_server_last_reachable, now);
if (entry.storage_server_last_unreachable)
stream << "; last failure " << get_human_time_ago(entry.storage_server_last_unreachable, now);
auto print_reachable = [&stream, &now] (bool reachable, auto first_unreachable, auto last_unreachable, auto last_reachable) {
if (first_unreachable == 0) {
if (last_reachable == 0)
stream << "Not yet tested";
else {
stream << "Yes (last tested " << get_human_time_ago(last_reachable, now);
if (last_unreachable)
stream << "; last failure " << get_human_time_ago(last_unreachable, now);
stream << ")";
}
} else {
stream << "NO";
if (!reachable)
stream << " - FAILING!";
stream << " (last tested " << get_human_time_ago(last_unreachable, now)
<< "; failing since " << get_human_time_ago(first_unreachable, now);
if (last_reachable)
stream << "; last good " << get_human_time_ago(last_reachable, now);
stream << ")";
}
} else {
stream << "NO";
if (!entry.storage_server_reachable)
stream << " - FAILING!";
stream << " (last tested " << get_human_time_ago(entry.storage_server_last_unreachable, now)
<< "; failing since " << get_human_time_ago(entry.storage_server_first_unreachable, now);
if (entry.storage_server_last_reachable)
stream << "; last good " << get_human_time_ago(entry.storage_server_last_reachable, now);
stream << ")";
}
stream << "\n";
stream << '\n';
};
stream << indent2 << "Storage Server Reachable: ";
print_reachable(entry.storage_server_reachable, entry.storage_server_first_unreachable, entry.storage_server_last_unreachable, entry.storage_server_last_reachable);
stream << indent2 << "Lokinet Reachable: ";
print_reachable(entry.lokinet_reachable, entry.lokinet_first_unreachable, entry.lokinet_last_unreachable, entry.lokinet_last_reachable);
//
// NOTE: Component Versions

View File

@ -3074,10 +3074,14 @@ namespace cryptonote { namespace rpc {
entry.last_uptime_proof = proof.timestamp;
auto system_now = std::chrono::system_clock::now();
auto steady_now = std::chrono::steady_clock::now();
entry.storage_server_reachable = !proof.ss_unreachable_for(netconf.UPTIME_PROOF_VALIDITY - netconf.UPTIME_PROOF_FREQUENCY, steady_now);
entry.storage_server_first_unreachable = reachable_to_time_t(proof.ss_first_unreachable, system_now, steady_now);
entry.storage_server_last_unreachable = reachable_to_time_t(proof.ss_last_unreachable, system_now, steady_now);
entry.storage_server_last_reachable = reachable_to_time_t(proof.ss_last_reachable, system_now, steady_now);
entry.storage_server_reachable = !proof.ss_reachable.unreachable_for(netconf.UPTIME_PROOF_VALIDITY - netconf.UPTIME_PROOF_FREQUENCY, steady_now);
entry.storage_server_first_unreachable = reachable_to_time_t(proof.ss_reachable.first_unreachable, system_now, steady_now);
entry.storage_server_last_unreachable = reachable_to_time_t(proof.ss_reachable.last_unreachable, system_now, steady_now);
entry.storage_server_last_reachable = reachable_to_time_t(proof.ss_reachable.last_reachable, system_now, steady_now);
entry.lokinet_reachable = !proof.lokinet_reachable.unreachable_for(netconf.UPTIME_PROOF_VALIDITY - netconf.UPTIME_PROOF_FREQUENCY, steady_now);
entry.lokinet_first_unreachable = reachable_to_time_t(proof.lokinet_reachable.first_unreachable, system_now, steady_now);
entry.lokinet_last_unreachable = reachable_to_time_t(proof.lokinet_reachable.last_unreachable, system_now, steady_now);
entry.lokinet_last_reachable = reachable_to_time_t(proof.lokinet_reachable.last_reachable, system_now, steady_now);
service_nodes::participation_history<service_nodes::participation_entry> const &checkpoint_participation = proof.checkpoint_participation;
service_nodes::participation_history<service_nodes::participation_entry> const &pulse_participation = proof.pulse_participation;
@ -3421,9 +3425,9 @@ namespace cryptonote { namespace rpc {
return res;
}
//------------------------------------------------------------------------------------------------------------------------------
REPORT_PEER_SS_STATUS::response core_rpc_server::invoke(REPORT_PEER_SS_STATUS::request&& req, rpc_context context)
REPORT_PEER_STATUS::response core_rpc_server::invoke(REPORT_PEER_STATUS::request&& req, rpc_context context)
{
REPORT_PEER_SS_STATUS::response res{};
REPORT_PEER_STATUS::response res{};
crypto::public_key pubkey;
if (!tools::hex_to_type(req.pubkey, pubkey)) {
@ -3431,9 +3435,14 @@ namespace cryptonote { namespace rpc {
throw rpc_error{ERROR_WRONG_PARAM, "Could not parse public key"};
}
if (req.type != "reachability")
bool success = false;
if (req.type == "lokinet")
success = m_core.get_service_node_list().set_lokinet_peer_reachable(pubkey, req.passed);
else if (req.type == "storage" || req.type == "reachability" /* TODO: old name, can be removed once SS no longer uses it */)
success = m_core.get_service_node_list().set_storage_server_peer_reachable(pubkey, req.passed);
else
throw rpc_error{ERROR_WRONG_PARAM, "Unknown status type"};
if (!m_core.set_storage_server_peer_reachable(pubkey, req.passed))
if (!success)
throw rpc_error{ERROR_WRONG_PARAM, "Pubkey not found"};
res.status = STATUS_OK;

View File

@ -264,7 +264,7 @@ namespace cryptonote::rpc {
LOKINET_PING::response invoke(LOKINET_PING::request&& req, rpc_context context);
GET_CHECKPOINTS::response invoke(GET_CHECKPOINTS::request&& req, rpc_context context);
GET_SN_STATE_CHANGES::response invoke(GET_SN_STATE_CHANGES::request&& req, rpc_context context);
REPORT_PEER_SS_STATUS::response invoke(REPORT_PEER_SS_STATUS::request&& req, rpc_context context);
REPORT_PEER_STATUS::response invoke(REPORT_PEER_STATUS::request&& req, rpc_context context);
TEST_TRIGGER_P2P_RESYNC::response invoke(TEST_TRIGGER_P2P_RESYNC::request&& req, rpc_context context);
TEST_TRIGGER_UPTIME_PROOF::response invoke(TEST_TRIGGER_UPTIME_PROOF::request&& req, rpc_context context);
ONS_NAMES_TO_OWNERS::response invoke(ONS_NAMES_TO_OWNERS::request&& req, rpc_context context);

View File

@ -1121,6 +1121,10 @@ KV_SERIALIZE_MAP_CODE_BEGIN(GET_SERVICE_NODES::requested_fields_t)
KV_SERIALIZE(storage_server_first_unreachable)
KV_SERIALIZE(storage_server_last_unreachable)
KV_SERIALIZE(storage_server_last_reachable)
KV_SERIALIZE(lokinet_reachable)
KV_SERIALIZE(lokinet_first_unreachable)
KV_SERIALIZE(lokinet_last_unreachable)
KV_SERIALIZE(lokinet_last_reachable)
KV_SERIALIZE(checkpoint_participation)
KV_SERIALIZE(pulse_participation)
KV_SERIALIZE(timestamp_participation)
@ -1177,6 +1181,10 @@ KV_SERIALIZE_MAP_CODE_BEGIN(GET_SERVICE_NODES::response::entry)
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(storage_server_first_unreachable)
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(storage_server_last_unreachable)
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(storage_server_last_reachable)
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(lokinet_reachable);
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(lokinet_first_unreachable)
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(lokinet_last_unreachable)
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(lokinet_last_reachable)
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(checkpoint_participation);
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(pulse_participation);
KV_SERIALIZE_ENTRY_FIELD_IF_REQUESTED(timestamp_participation);
@ -1302,7 +1310,7 @@ KV_SERIALIZE_MAP_CODE_BEGIN(GET_SN_STATE_CHANGES::response)
KV_SERIALIZE_MAP_CODE_END()
KV_SERIALIZE_MAP_CODE_BEGIN(REPORT_PEER_SS_STATUS::request)
KV_SERIALIZE_MAP_CODE_BEGIN(REPORT_PEER_STATUS::request)
KV_SERIALIZE(type)
KV_SERIALIZE(pubkey)
KV_SERIALIZE(passed)

View File

@ -296,7 +296,7 @@ namespace rpc {
uint64_t height; // The voting block height for the changing service node and validators
uint32_t index; // The index of all tested nodes at the given height for which this state change applies
std::vector<uint32_t> voters; // The position of validators in the testing quorum who validated and voted for this state change. This typically contains just 7 required voter slots (of 10 eligible voters).
std::optional<std::vector<std::string>> reasons; // Reasons for the decommissioning/deregistration as reported by the voting quorum. This contains any reasons that all voters agreed on, one or more of: "uptime" (missing uptime proofs), "checkpoints" (missed checkpoint votes), "pulse" (missing pulse votes), "storage" (storage server pings failed), "timecheck" (time sync pings failed), "timesync" (time was out of sync)
std::optional<std::vector<std::string>> reasons; // Reasons for the decommissioning/deregistration as reported by the voting quorum. This contains any reasons that all voters agreed on, one or more of: "uptime" (missing uptime proofs), "checkpoints" (missed checkpoint votes), "pulse" (missing pulse votes), "storage" (storage server pings failed), "lokinet" (lokinet router unreachable), "timecheck" (time sync pings failed), "timesync" (time was out of sync)
std::optional<std::vector<std::string>> reasons_maybe; // If present, this contains any decomm/dereg reasons that were given by some but not all quorum voters
KV_MAP_SERIALIZABLE
};
@ -2051,10 +2051,13 @@ namespace rpc {
bool last_uptime_proof;
bool storage_server_reachable;
bool storage_server_reachable_timestamp;
bool storage_server_last_reachable;
bool storage_server_last_unreachable;
bool storage_server_first_unreachable;
bool lokinet_reachable;
bool lokinet_last_reachable;
bool lokinet_last_unreachable;
bool lokinet_first_unreachable;
bool checkpoint_participation;
bool pulse_participation;
bool timestamp_participation;
@ -2118,8 +2121,12 @@ namespace rpc {
uint64_t last_uptime_proof; // The last time this Service Node's uptime proof was relayed by at least 1 Service Node other than itself in unix epoch time.
bool storage_server_reachable; // True if this storage server is currently passing tests for the purposes of SN node testing: true if the last test passed, or if it has been unreachable for less than an hour; false if it has been failing tests for more than an hour (and thus is considered unreachable).
uint64_t storage_server_first_unreachable; // If the last test we received was a failure, this field contains the timestamp when failures started. Will be 0 if the last result was a success or the node has not yet been tested. (To disinguish between these cases check storage_server_last_reachable).
uint64_t storage_server_last_unreachable; // The last time this Service Node failed a ping test (regardless of whether or not it is currently failing); 0 if it never failed a test since startup.
uint64_t storage_server_last_reachable; // The last time we received a successful ping response for this Service Node (whether or not it is currently failing); 0 if we have never received a success since startup.
uint64_t storage_server_last_unreachable; // The last time this service node's storage server failed a ping test (regardless of whether or not it is currently failing); 0 if it never failed a test since startup.
uint64_t storage_server_last_reachable; // The last time we received a successful ping response for this storage server (whether or not it is currently failing); 0 if we have never received a success since startup.
bool lokinet_reachable; // True if this lokinet is currently passing tests for the purposes of SN node testing: true if the last test passed, or if it has been unreachable for less than an hour; false if it has been failing tests for more than an hour (and thus is considered unreachable).
uint64_t lokinet_first_unreachable; // If the last test we received was a failure, this field contains the timestamp when failures started. Will be 0 if the last result was a success or the node has not yet been tested. (To disinguish between these cases check lokinet_last_reachable).
uint64_t lokinet_last_unreachable; // The last time this service node's lokinet failed a reachable test (regardless of whether or not it is currently failing); 0 if it never failed a test since startup.
uint64_t lokinet_last_reachable; // The last time we received a successful test response for this service node's lokinet router (whether or not it is currently failing); 0 if we have never received a success since startup.
std::vector<service_nodes::participation_entry> checkpoint_participation; // Of the last N checkpoints the Service Node is in a checkpointing quorum, record whether or not the Service Node voted to checkpoint a block
std::vector<service_nodes::participation_entry> pulse_participation; // Of the last N pulse blocks the Service Node is in a pulse quorum, record whether or not the Service Node voted (participated) in that block
@ -2383,13 +2390,16 @@ namespace rpc {
OXEN_RPC_DOC_INTROSPECT
struct REPORT_PEER_SS_STATUS : RPC_COMMAND
// Reports service node peer status (success/fail) from lokinet and storage server.
struct REPORT_PEER_STATUS : RPC_COMMAND
{
static constexpr auto names() { return NAMES("report_peer_storage_server_status"); }
// TODO: remove the `report_peer_storage_server_status` once we require a storage server version
// that stops using the old name.
static constexpr auto names() { return NAMES("report_peer_status", "report_peer_storage_server_status"); }
struct request
{
std::string type; // test type (currently used: ["reachability"])
std::string type; // test type; currently supported are: "storage" and "lokinet" for storage server and lokinet tests, respectively.
std::string pubkey; // service node pubkey
bool passed; // whether the node is passing the test
@ -2640,7 +2650,7 @@ namespace rpc {
GET_OUTPUT_BLACKLIST,
GET_CHECKPOINTS,
GET_SN_STATE_CHANGES,
REPORT_PEER_SS_STATUS,
REPORT_PEER_STATUS,
TEST_TRIGGER_P2P_RESYNC,
TEST_TRIGGER_UPTIME_PROOF,
ONS_NAMES_TO_OWNERS,