make sure to retry fetch sqwarm with a new targetNode if needed

This commit is contained in:
Audric Ackermann 2021-08-02 14:33:39 +10:00
parent 2c174bdac7
commit 19c6bebab0
No known key found for this signature in database
GPG Key ID: 999F434D76324AD4
3 changed files with 68 additions and 27 deletions

View File

@ -121,7 +121,7 @@ const ITEMS_TABLE = 'items';
const ATTACHMENT_DOWNLOADS_TABLE = 'attachment_downloads';
const CLOSED_GROUP_V2_KEY_PAIRS_TABLE = 'encryptionKeyPairsForClosedGroupV2';
const MAX_PUBKEYS_MEMBERS = 1000;
const MAX_PUBKEYS_MEMBERS = 300;
function objectToJSON(data) {
return JSON.stringify(data);

View File

@ -199,7 +199,7 @@ export type SendParams = {
// get snodes for pubkey from random snode. Uses an existing snode
async function requestSnodesForPubkeyRetryable(
async function requestSnodesForPubkeyWithTargetNodeRetryable(
pubKey: string,
targetNode: Snode
): Promise<Array<Snode>> {
@ -210,15 +210,15 @@ async function requestSnodesForPubkeyRetryable(
if (!result) {
window?.log?.warn(
`LokiSnodeAPI::requestSnodesForPubkeyRetryable - lokiRpc on ${targetNode.ip}:${targetNode.port} returned falsish value`,
`LokiSnodeAPI::requestSnodesForPubkeyWithTargetNodeRetryable - lokiRpc on ${targetNode.ip}:${targetNode.port} returned falsish value`,
result
);
throw new Error('requestSnodesForPubkeyRetryable: Invalid result');
throw new Error('requestSnodesForPubkeyWithTargetNodeRetryable: Invalid result');
}
if (result.status !== 200) {
window?.log?.warn('Status is not 200 for get_snodes_for_pubkey');
throw new Error('requestSnodesForPubkeyRetryable: Invalid status code');
throw new Error('requestSnodesForPubkeyWithTargetNodeRetryable: Invalid status code');
}
try {
@ -240,26 +240,64 @@ async function requestSnodesForPubkeyRetryable(
}
}
async function requestSnodesForPubkeyWithTargetNode(
pubKey: string,
targetNode: Snode
): Promise<Array<Snode>> {
// don't catch exception in here. we want them to bubble up
// this is the level where our targetNode is supposed to be valid. We retry a few times with this one.
// if all our retries fails, we retry from the caller of this function with a new target node.
return pRetry(
async () => {
return requestSnodesForPubkeyWithTargetNodeRetryable(pubKey, targetNode);
},
{
retries: 3,
factor: 2,
minTimeout: 100,
maxTimeout: 2000,
onFailedAttempt: e => {
window?.log?.warn(
`requestSnodesForPubkeyWithTargetNode attempt #${e.attemptNumber} failed. ${e.retriesLeft} retries left...`
);
},
}
);
}
async function requestSnodesForPubkeyRetryable(pubKey: string): Promise<Array<Snode>> {
// don't catch exception in here. we want them to bubble up
// this is the level where our targetNode is not yet known. We retry a few times with a new one everytime.
// the idea is that the requestSnodesForPubkeyWithTargetNode will remove a failing targetNode
return pRetry(
async () => {
const targetNode = await getRandomSnode();
return requestSnodesForPubkeyWithTargetNode(pubKey, targetNode);
},
{
retries: 3,
factor: 2,
minTimeout: 100,
maxTimeout: 4000,
onFailedAttempt: e => {
window?.log?.warn(
`requestSnodesForPubkeyRetryable attempt #${e.attemptNumber} failed. ${e.retriesLeft} retries left...`
);
},
}
);
}
export async function requestSnodesForPubkey(pubKey: string): Promise<Array<Snode>> {
try {
const targetNode = await getRandomSnode();
// catch exception in here only.
// the idea is that the pretry will retry a few times each calls, except if an AbortError is thrown.
return await pRetry(
async () => {
return requestSnodesForPubkeyRetryable(pubKey, targetNode);
},
{
retries: 10, // each path can fail 3 times before being dropped, we have 3 paths at most
factor: 2,
minTimeout: 1000,
maxTimeout: 4000,
onFailedAttempt: e => {
window?.log?.warn(
`requestSnodesForPubkey attempt #${e.attemptNumber} failed. ${e.retriesLeft} retries left...`
);
},
}
);
// if all retry fails, we will end up in the catch below when the last exception thrown
return await requestSnodesForPubkeyRetryable(pubKey);
} catch (e) {
window?.log?.error('LokiSnodeAPI::requestSnodesForPubkey - error', e);

View File

@ -6,7 +6,7 @@ import ByteBuffer from 'bytebuffer';
import { OnionPaths } from '../onions';
import { fromHex, toHex } from '../utils/String';
import pRetry from 'p-retry';
import { incrementBadPathCountOrDrop } from '../onions/onionPath';
import { ed25519Str, incrementBadPathCountOrDrop } from '../onions/onionPath';
import _ from 'lodash';
import { hrefPnServerDev, hrefPnServerProd } from '../../pushnotification/PnServer';
// hold the ed25519 key of a snode against the time it fails. Used to remove a snode only after a few failures (snodeFailureThreshold failures)
@ -291,6 +291,7 @@ async function processAnyOtherErrorOnPath(
// If we have a specific node in fault we can exclude just this node.
// Otherwise we increment the whole path failure count
if (nodeNotFound) {
window?.log?.warn('node not found error with: ', ed25519Str(nodeNotFound));
await exports.incrementBadSnodeCountOrDrop({
snodeEd25519: nodeNotFound,
guardNodeEd25519,
@ -613,15 +614,15 @@ export async function incrementBadSnodeCountOrDrop({
const newFailureCount = oldFailureCount + 1;
snodeFailureCount[snodeEd25519] = newFailureCount;
if (newFailureCount >= snodeFailureThreshold) {
window?.log?.warn(`Failure threshold reached for: ${snodeEd25519}; dropping it.`);
window?.log?.warn(`Failure threshold reached for: ${ed25519Str(snodeEd25519)}; dropping it.`);
if (associatedWith) {
(window?.log?.info || console.warn)(
`Dropping ${snodeEd25519} from swarm of ${associatedWith}`
`Dropping ${ed25519Str(snodeEd25519)} from swarm of ${ed25519Str(associatedWith)}`
);
await dropSnodeFromSwarmIfNeeded(associatedWith, snodeEd25519);
}
window?.log?.info(`Dropping ${snodeEd25519} from snodepool`);
window?.log?.info(`Dropping ${ed25519Str(snodeEd25519)} from snodepool`);
await dropSnodeFromSnodePool(snodeEd25519);
// the snode was ejected from the pool so it won't be used again.
@ -642,7 +643,9 @@ export async function incrementBadSnodeCountOrDrop({
}
} else {
window?.log?.warn(
`Couldn't reach snode at: ${snodeEd25519}; setting his failure count to ${newFailureCount}`
`Couldn't reach snode at: ${ed25519Str(
snodeEd25519
)}; setting his failure count to ${newFailureCount}`
);
}
}