in internal/processmetrics/processmetrics.go [275:477]
func createProcessCollectors(ctx context.Context, params Parameters, client cloudmonitoring.TimeSeriesCreator, sapInstances *sapb.SAPInstances) *Properties {
p := &Properties{
Config: params.Config,
Client: client,
HeartbeatSpec: params.HeartbeatSpec,
}
// For retries logic and backoff policy:
// For slow moving process metrics we are going ahead with 3 retries on failures, which means 4 attempts in total.
// Attempt - 1 Failure: wait for 30 seconds
// Attempt - 2 Failure: wait for 60 seconds
// Attempt - 3 Failure: wait for 120 seconds
// For fast moving process metrics we are going ahead with 3 retries on failures, which means 4 attempts in total.
// Attempt - 1 Failure: wait for 5 seconds
// Attempt - 2 Failure: wait for 10 seconds
// Attempt - 3 Failure: wait for 20 seconds
// Note: There is also randomization factor associated with exponential backoffs the intervals can
// have a delta of 3-4 seconds, which does not affect the overall process.
pmSlowFreq := p.Config.GetCollectionConfiguration().GetSlowProcessMetricsFrequency()
pmFastFreq := p.Config.GetCollectionConfiguration().GetProcessMetricsFrequency()
skippedMetrics := make(map[string]bool)
skipMetricsForNetweaverKernel(ctx, params.Discovery, skippedMetrics)
sl := p.Config.GetCollectionConfiguration().GetProcessMetricsToSkip()
for _, metric := range sl {
skippedMetrics[metric] = true
}
log.CtxLogger(ctx).Info("Creating SAP additional metrics collector for sapservices (active and enabled metric).")
sapServiceCollector := &sapservice.InstanceProperties{
Config: p.Config,
Client: p.Client,
Execute: commandlineexecutor.ExecuteCommand,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
log.CtxLogger(ctx).Info("Creating SAP control processes per process CPU, memory usage metrics collector.")
sapStartCollector := &computeresources.SAPControlProcInstanceProperties{
Config: p.Config,
Client: p.Client,
Executor: commandlineexecutor.ExecuteCommand,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
log.CtxLogger(ctx).Info("Creating infra migration event metrics collector.")
migrationCollector := infra.New(p.Config, p.Client, params.GCEBetaService, skippedMetrics,
cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute))
log.CtxLogger(ctx).Info("Creating networkstats metrics collector.")
networkstatsCollector := &networkstats.Properties{
Executor: commandlineexecutor.ExecuteCommand,
Config: p.Config,
Client: p.Client,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
SkippedMetrics: skippedMetrics,
}
log.CtxLogger(ctx).Info("Creating volume availability metrics collector.")
volumeDetailsCollector := &hanavolume.Properties{
Executor: commandlineexecutor.ExecuteCommand,
Config: p.Config,
Client: p.Client,
CommandParams: commandlineexecutor.Params{
Executable: "df",
ArgsToSplit: "-h",
},
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
p.Collectors = append(
p.Collectors,
sapServiceCollector,
sapStartCollector,
migrationCollector,
networkstatsCollector,
volumeDetailsCollector,
)
sids := make(map[string]bool)
clusterCollectorCreated := false
for _, instance := range sapInstances.GetInstances() {
sids[instance.GetSapsid()] = true
if clusterCollectorCreated == false {
log.CtxLogger(ctx).Infow("Creating cluster collector for instance", "instance", instance)
clusterCollector := &cluster.InstanceProperties{
SAPInstance: instance,
Config: p.Config,
Client: p.Client,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
p.Collectors = append(p.Collectors, clusterCollector)
clusterCollectorCreated = true
}
if instance.GetType() == sapb.InstanceType_HANA {
log.CtxLogger(ctx).Infow("Creating HANA per process CPU, memory usage metrics collector for instance", "instance", instance)
hanaComputeresourcesCollector := &computeresources.HANAInstanceProperties{
Config: p.Config,
Client: p.Client,
Executor: commandlineexecutor.ExecuteCommand,
SAPInstance: instance,
SAPControlClient: sapcontrolclient.New(instance.GetInstanceNumber()),
LastValue: make(map[string]*process.IOCountersStat),
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
log.CtxLogger(ctx).Infow("Creating HANA collector for instance.", "instance", instance)
hanaCollector := &hana.InstanceProperties{
SAPInstance: instance,
Config: p.Config,
Client: p.Client,
HANAQueryFailCount: 0,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
p.Collectors = append(p.Collectors, hanaComputeresourcesCollector, hanaCollector)
log.CtxLogger(ctx).Infow("Creating FastMoving Collector for HANA", "instance", instance)
fmCollector := &fastmovingmetrics.InstanceProperties{
SAPInstance: instance,
Config: p.Config,
Client: p.Client,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmFastFreq)*time.Second, 3, time.Minute, 35*time.Second),
ReplicationConfig: sapdiscovery.HANAReplicationConfig,
SapSystemInterface: params.Discovery,
}
p.FastMovingCollectors = append(p.FastMovingCollectors, fmCollector)
}
if instance.GetType() == sapb.InstanceType_NETWEAVER {
log.CtxLogger(ctx).Infow("Creating Netweaver per process CPU, memory usage metrics collector for instance.", "instance", instance)
netweaverComputeresourcesCollector := &computeresources.NetweaverInstanceProperties{
Config: p.Config,
Client: p.Client,
Executor: commandlineexecutor.ExecuteCommand,
SAPInstance: instance,
SAPControlClient: sapcontrolclient.New(instance.GetInstanceNumber()),
LastValue: make(map[string]*process.IOCountersStat),
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
log.CtxLogger(ctx).Infow("Creating Netweaver collector for instance.", "instance", instance)
netweaverCollector := &netweaver.InstanceProperties{
SAPInstance: instance,
Config: p.Config,
Client: p.Client,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
p.Collectors = append(p.Collectors, netweaverComputeresourcesCollector, netweaverCollector)
log.CtxLogger(ctx).Infow("Creating FastMoving Collector for Netweaver", "instance", instance)
fmCollector := &fastmovingmetrics.InstanceProperties{
SAPInstance: instance,
Config: p.Config,
Client: p.Client,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmFastFreq)*time.Second, 3, time.Minute, 35*time.Second),
SapSystemInterface: params.Discovery,
}
p.FastMovingCollectors = append(p.FastMovingCollectors, fmCollector)
}
}
if len(sids) != 0 {
log.CtxLogger(ctx).Info("Creating maintenance mode collector.")
maintenanceModeCollector := &maintenance.InstanceProperties{
Config: p.Config,
Client: p.Client,
Reader: maintenance.ModeReader{},
Sids: sids,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
}
p.Collectors = append(p.Collectors, maintenanceModeCollector)
if params.PCMParams.WorkloadConfig == nil {
log.CtxLogger(ctx).Debug("Cannot collect pacemaker metrics, no collection definition detected.")
} else {
log.CtxLogger(ctx).Debug("Creating pacemaker metrics collector.")
pacemakerCollector := &pacemaker.InstanceProperties{
Config: p.Config,
Client: p.Client,
Sids: sids,
SkippedMetrics: skippedMetrics,
PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
PacemakerCollector: &pacemaker.Params{
PCMParams: params.PCMParams,
},
}
p.Collectors = append(p.Collectors, pacemakerCollector)
}
}
log.CtxLogger(ctx).Infow("Created process metrics collectors.", "numberofcollectors", len(p.Collectors))
return p
}