in src/ccf/ccf-provider/CcfNetworkProvider.cs [1127:1269]
private async Task WaitForJoinNodeReady(
string networkName,
JsonObject? providerConfig,
NodeEndpoint targetNodeEndpoint,
NodeEndpoint joinNodeEndpoint,
string serviceCertPem,
DesiredJoinNodeState desiredState)
{
var serviceClient = this.GetOrAddServiceClient(targetNodeEndpoint, serviceCertPem);
// For nodes joining in network open state we need to transition the node to trusted
// before the node can finish joining successfully.
// TODO (gsinha): Add retries around GetFromJsonAsync transient failure.
var networkState = (await serviceClient.GetFromJsonAsync<JsonObject>("/node/network"))!;
if (networkState["service_status"]!.ToString() == "Open")
{
JsonObject nodeState =
await this.WaitForNodeToAppearOnNetwork(
serviceClient,
joinNodeEndpoint.NodeName,
onRetry: () => this.CheckNodeHealthy(
networkName,
joinNodeEndpoint.NodeName,
providerConfig));
var status = nodeState["status"]!.ToString();
if (status == "Pending")
{
#pragma warning disable MEN002 // Line is too long
// At times node to node communication between the new and the primary takes
// a while to get established due to DNS resolve/caching issues. This shows up
// as the create proposal transcation commit taking time. So set a higher timeout
// to give a chance to communication to get established.
//# Node 1 added to Raft config
// 2024-11-07T08:17:28.355534Z -0.017 0 [info ] ../src/node/channels.h:828 | Initiating node channel with n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701].
// 2024-11-07T08:17:28.355852Z 100 [debug] ../src/host/node_connections.h:458 | Added node connection with n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701] (foo-1.westeurope.azurecontainer.io:8081)
// 2024-11-07T08:17:28.355863Z 100 [debug] ../src/host/node_connections.h:434 | node send to n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701] [1208]
// 2024-11-07T08:17:28.355868Z -0.018 0 [info ] ../src/consensus/aft/raft.h:2567 | Added raft node n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701] (foo-1.westeurope.azurecontainer.io:8081)
//# Still unable to connect to Node 1
// 2024-11-07T08:17:30.358161Z -0.004 0 [info ] ../src/node/channels.h:828 | Initiating node channel with n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701].
// 2024-11-07T08:17:30.358490Z 100 [debug] ../src/host/node_connections.h:434 | node send to n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701] [1208]
// 2024-11-07T08:17:30.382290Z 100 [debug] ../src/host/tcp.h:699 | uv_tcp_connect async retry: connection timed out
// 2024-11-07T08:17:30.382401Z 100 [info ] ../src/host/tcp.h:536 | Unable to connect: all resolved addresses failed: foo-1.westeurope.azurecontainer.io:8081
// 2024-11-07T08:17:30.382412Z 100 [debug] ../src/host/node_connections.h:227 | Disconnecting outgoing connection with n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701]: connect failed
// 2024-11-07T08:17:30.382454Z 100 [debug] ../src/host/node_connections.h:472 | Removed node connection with n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701]
//...
//# Eventually succeed in connecting to Node 1
// 2024-11-07T08:18:32.142617Z -0.004 0 [info ] ../src/node/channels.h:828 | Initiating node channel with n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701].
// 2024-11-07T08:18:32.146380Z 100 [debug] ../src/host/node_connections.h:458 | Added node connection with n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701] (foo-1.westeurope.azurecontainer.io:8081)
// 2024-11-07T08:18:32.146430Z 100 [debug] ../src/host/node_connections.h:434 | node send to n[d267d70732038c31038eabfc093b63745520560ec0969160595673ad95b05701] [1208]
// 2024-11-07T08:18:32.150568Z 100 [info ] ../src/host/socket.h:53 | TCP Node Outgoing connected
//# Commit advances
// 2024-11-07T08:18:32.926155Z 100 [debug] ../src/host/ledger.h:1435 | Ledger commit: 133/133
#pragma warning restore MEN002 // Line is too long
var timeout = TimeSpan.FromSeconds(180);
await TransitionNodeToTrusted(
serviceClient,
nodeState["node_id"]!.ToString(),
timeout);
}
}
// Do a health check as part of retries as in case the join node fails to start then the
// https endpoint won't respond and there would be no point retrying.
var selfSignedCertPem = await this.GetNodeSelfSignedCert(
joinNodeEndpoint,
onRetry: () => this.CheckNodeHealthy(
networkName,
joinNodeEndpoint.NodeName,
providerConfig));
var client = this.GetOrAddNodeClient(
joinNodeEndpoint,
serviceCertPem,
selfSignedCertPem);
TimeSpan readyTimeout = TimeSpan.FromSeconds(60);
var stopwatch = Stopwatch.StartNew();
var joinNodeName = joinNodeEndpoint.NodeName;
var expectedState = desiredState.ToString();
while (true)
{
using var response = await client.GetAsync("/node/state");
if (response.IsSuccessStatusCode)
{
var nodeState = (await response.Content.ReadFromJsonAsync<JsonObject>())!;
var state = nodeState["state"]!.ToString();
if (state == expectedState)
{
this.logger.LogInformation(
$"{joinNodeName}: {joinNodeEndpoint.ClientRpcAddress}/node/state " +
$"is reporting {expectedState}.");
break;
}
this.logger.LogInformation(
$"{joinNodeName}: Waiting for " +
$"{joinNodeEndpoint.ClientRpcAddress}/node/state " +
$"to report {expectedState}. Current state: {state}");
}
else
{
this.logger.LogInformation(
$"{joinNodeName}: Waiting for " +
$"{joinNodeEndpoint.ClientRpcAddress}/node/state " +
$"to report " +
$"{expectedState}. Current statusCode: {response.StatusCode}.");
}
if (stopwatch.Elapsed > readyTimeout)
{
throw new TimeoutException(
$"{joinNodeName}: Hit timeout waiting for join node " +
$"{joinNodeEndpoint.ClientRpcAddress} to become {expectedState}");
}
await Task.Delay(TimeSpan.FromSeconds(1));
}
async Task TransitionNodeToTrusted(
HttpClient serviceClient,
string nodeId,
TimeSpan? timeout = null)
{
this.logger.LogInformation(
$"Submitting transition_node_to_trusted proposal for {nodeId}.");
var proposalContent = new JsonObject
{
["actions"] = new JsonArray
{
new JsonObject
{
["name"] = "transition_node_to_trusted",
["args"] = new JsonObject
{
["node_id"] = nodeId,
["valid_from"] = DateTime.UtcNow.ToString("O")
}
}
}
};
var result = await this.CreateProposal(serviceClient, proposalContent, timeout);
this.logger.LogInformation(JsonSerializer.Serialize(result, Utils.Options));
}
}