in functions/source/nic-attachment/lib/aws/index.js [369:494]
async getInstanceHealthCheck(instance, heartBeatInterval = null) {
if (!(instance && instance.instanceId)) {
logger.error(
'getInstanceHealthCheck > error: no instanceId property found' +
` on instance: ${JSON.stringify(instance)}`
);
return Promise.reject(`invalid instance: ${JSON.stringify(instance)}`);
}
var params = {
Key: {
instanceId: instance.instanceId
},
TableName: DB.FORTIGATEAUTOSCALE.TableName
};
try {
let scriptExecutionStartTime,
healthy,
heartBeatLossCount,
heartBeatDelays,
heartBeatDelayAllowance =
parseInt(this._settings['heartbeat-delay-allowance']) * 1000,
inevitableFailToSyncTime,
interval,
healthCheckRecord,
data = await docClient.get(params).promise();
if (!data.Item) {
logger.info('called getInstanceHealthCheck: no record found');
return null;
}
healthCheckRecord = data.Item;
// to get a more accurate heart beat elapsed time, the script execution time so far
// is compensated.
scriptExecutionStartTime = process.env.SCRIPT_EXECUTION_TIME_CHECKPOINT;
interval =
heartBeatInterval && !isNaN(heartBeatInterval)
? heartBeatInterval
: healthCheckRecord.heartBeatInterval;
heartBeatDelays = scriptExecutionStartTime - healthCheckRecord.nextHeartBeatTime;
// The the inevitable-fail-to-sync time is defined as:
// the maximum amount of time for an instance to be able to sync without being
// deemed unhealth. For example:
// the instance has x (x < hb loss count allowance) loss count recorded.
// the hb loss count allowance is X.
// the hb interval is set to i second.
// its hb sync time delay allowance is I ms.
// its current hb sync time is t.
// its expected next hb sync time is T.
// if t > T + (X - x - 1) * (i * 1000 + I), t has passed the
// inevitable-fail-to-sync time. This means the instance can never catch up
// with a heartbeat sync that makes it possile to deem health again.
inevitableFailToSyncTime =
healthCheckRecord.nextHeartBeatTime +
(parseInt(this._settings['heartbeat-loss-count']) -
healthCheckRecord.heartBeatLossCount -
1) *
(interval * 1000 + heartBeatDelayAllowance);
// based on the test results, network delay brought more significant side effects
// to the heart beat monitoring checking than we thought. we have to expand the
// checking time to reasonably offset the delay.
// heartBeatDelayAllowance is used for this purpose
if (heartBeatDelays < heartBeatDelayAllowance) {
// reset hb loss count if instance sends hb within its interval
healthy = true;
heartBeatLossCount = 0;
} else {
// if the current sync heartbeat is late, the instance is still considered
// healthy unless the the inevitable-fail-to-sync time has passed.
healthy = scriptExecutionStartTime <= inevitableFailToSyncTime;
heartBeatLossCount = healthCheckRecord.heartBeatLossCount + 1;
logger.info(
`hb sync is late${heartBeatLossCount > 1 ? ' again' : ''}.\n` +
`hb loss count becomes: ${heartBeatLossCount},\n` +
`hb sync delay allowance: ${heartBeatDelayAllowance} ms\n` +
'expected hb arrived time: ' +
`${healthCheckRecord.nextHeartBeatTime} ms in unix timestamp\n` +
'current hb sync check time: ' +
`${scriptExecutionStartTime} ms in unix timestamp\n` +
`this hb sync delay is: ${heartBeatDelays} ms`
);
// log the math why this instance is deemed unhealthy
if (!healthy) {
logger.info(
'Instance is deemed unhealthy. reasons:\n' +
`previous hb loss count: ${healthCheckRecord.heartBeatLossCount},\n` +
`hb sync delay allowance: ${heartBeatDelayAllowance} ms\n` +
'expected hb arrived time: ' +
`${healthCheckRecord.nextHeartBeatTime} ms in unix timestamp\n` +
'current hb sync check time: ' +
`${scriptExecutionStartTime} ms in unix timestamp\n` +
`this hb sync delays: ${heartBeatDelays} ms\n` +
'the inevitable-fail-to-sync time: ' +
`${inevitableFailToSyncTime} ms in unix timestamp has passed.`
);
}
}
logger.info(
'called getInstanceHealthCheck. (timestamp: ' +
`${scriptExecutionStartTime}, interval:${heartBeatInterval})` +
'healthcheck record:',
JSON.stringify(healthCheckRecord)
);
return {
instanceId: instance.instanceId,
ip: healthCheckRecord.ip || '',
healthy: healthy,
heartBeatLossCount: heartBeatLossCount,
heartBeatInterval: interval,
nextHeartBeatTime: Date.now() + interval * 1000,
primaryIp: healthCheckRecord.primaryIp,
syncState: healthCheckRecord.syncState,
inSync: healthCheckRecord.syncState === 'in-sync',
inevitableFailToSyncTime: inevitableFailToSyncTime,
healthCheckTime: scriptExecutionStartTime
};
} catch (error) {
logger.info(
'called getInstanceHealthCheck with error. ' +
`error: ${JSON.stringify(
error instanceof Error
? { message: error.message, stack: error.stack }
: error
)}`
);
return null;
}
}