async getInstanceHealthCheck()

in functions/source/fgt-asg-handler/lib/aws/index.js [369:494]


    async getInstanceHealthCheck(instance, heartBeatInterval = null) {
        if (!(instance && instance.instanceId)) {
            logger.error(
                'getInstanceHealthCheck > error: no instanceId property found' +
                    ` on instance: ${JSON.stringify(instance)}`
            );
            return Promise.reject(`invalid instance: ${JSON.stringify(instance)}`);
        }
        var params = {
            Key: {
                instanceId: instance.instanceId
            },
            TableName: DB.FORTIGATEAUTOSCALE.TableName
        };
        try {
            let scriptExecutionStartTime,
                healthy,
                heartBeatLossCount,
                heartBeatDelays,
                heartBeatDelayAllowance =
                    parseInt(this._settings['heartbeat-delay-allowance']) * 1000,
                inevitableFailToSyncTime,
                interval,
                healthCheckRecord,
                data = await docClient.get(params).promise();
            if (!data.Item) {
                logger.info('called getInstanceHealthCheck: no record found');
                return null;
            }
            healthCheckRecord = data.Item;
            // to get a more accurate heart beat elapsed time, the script execution time so far
            // is compensated.
            scriptExecutionStartTime = process.env.SCRIPT_EXECUTION_TIME_CHECKPOINT;
            interval =
                heartBeatInterval && !isNaN(heartBeatInterval)
                    ? heartBeatInterval
                    : healthCheckRecord.heartBeatInterval;
            heartBeatDelays = scriptExecutionStartTime - healthCheckRecord.nextHeartBeatTime;
            // The the inevitable-fail-to-sync time is defined as:
            // the maximum amount of time for an instance to be able to sync without being
            // deemed unhealth. For example:
            // the instance has x (x < hb loss count allowance) loss count recorded.
            // the hb loss count allowance is X.
            // the hb interval is set to i second.
            // its hb sync time delay allowance is I ms.
            // its current hb sync time is t.
            // its expected next hb sync time is T.
            // if t > T + (X - x - 1) * (i * 1000 + I), t has passed the
            // inevitable-fail-to-sync time. This means the instance can never catch up
            // with a heartbeat sync that makes it possile to deem health again.
            inevitableFailToSyncTime =
                healthCheckRecord.nextHeartBeatTime +
                (parseInt(this._settings['heartbeat-loss-count']) -
                    healthCheckRecord.heartBeatLossCount -
                    1) *
                    (interval * 1000 + heartBeatDelayAllowance);
            // based on the test results, network delay brought more significant side effects
            // to the heart beat monitoring checking than we thought. we have to expand the
            // checking time to reasonably offset the delay.
            // heartBeatDelayAllowance is used for this purpose
            if (heartBeatDelays < heartBeatDelayAllowance) {
                // reset hb loss count if instance sends hb within its interval
                healthy = true;
                heartBeatLossCount = 0;
            } else {
                // if the current sync heartbeat is late, the instance is still considered
                // healthy unless the the inevitable-fail-to-sync time has passed.
                healthy = scriptExecutionStartTime <= inevitableFailToSyncTime;
                heartBeatLossCount = healthCheckRecord.heartBeatLossCount + 1;
                logger.info(
                    `hb sync is late${heartBeatLossCount > 1 ? ' again' : ''}.\n` +
                        `hb loss count becomes: ${heartBeatLossCount},\n` +
                        `hb sync delay allowance: ${heartBeatDelayAllowance} ms\n` +
                        'expected hb arrived time: ' +
                        `${healthCheckRecord.nextHeartBeatTime} ms in unix timestamp\n` +
                        'current hb sync check time: ' +
                        `${scriptExecutionStartTime} ms in unix timestamp\n` +
                        `this hb sync delay is: ${heartBeatDelays} ms`
                );
                // log the math why this instance is deemed unhealthy
                if (!healthy) {
                    logger.info(
                        'Instance is deemed unhealthy. reasons:\n' +
                            `previous hb loss count: ${healthCheckRecord.heartBeatLossCount},\n` +
                            `hb sync delay allowance: ${heartBeatDelayAllowance} ms\n` +
                            'expected hb arrived time: ' +
                            `${healthCheckRecord.nextHeartBeatTime} ms in unix timestamp\n` +
                            'current hb sync check time: ' +
                            `${scriptExecutionStartTime} ms in unix timestamp\n` +
                            `this hb sync delays: ${heartBeatDelays} ms\n` +
                            'the inevitable-fail-to-sync time: ' +
                            `${inevitableFailToSyncTime} ms in unix timestamp has passed.`
                    );
                }
            }
            logger.info(
                'called getInstanceHealthCheck. (timestamp: ' +
                    `${scriptExecutionStartTime},  interval:${heartBeatInterval})` +
                    'healthcheck record:',
                JSON.stringify(healthCheckRecord)
            );
            return {
                instanceId: instance.instanceId,
                ip: healthCheckRecord.ip || '',
                healthy: healthy,
                heartBeatLossCount: heartBeatLossCount,
                heartBeatInterval: interval,
                nextHeartBeatTime: Date.now() + interval * 1000,
                primaryIp: healthCheckRecord.primaryIp,
                syncState: healthCheckRecord.syncState,
                inSync: healthCheckRecord.syncState === 'in-sync',
                inevitableFailToSyncTime: inevitableFailToSyncTime,
                healthCheckTime: scriptExecutionStartTime
            };
        } catch (error) {
            logger.info(
                'called getInstanceHealthCheck with error. ' +
                    `error: ${JSON.stringify(
                        error instanceof Error
                            ? { message: error.message, stack: error.stack }
                            : error
                    )}`
            );
            return null;
        }
    }