func createProcessCollectors()

in internal/processmetrics/processmetrics.go [275:477]


func createProcessCollectors(ctx context.Context, params Parameters, client cloudmonitoring.TimeSeriesCreator, sapInstances *sapb.SAPInstances) *Properties {
	p := &Properties{
		Config:        params.Config,
		Client:        client,
		HeartbeatSpec: params.HeartbeatSpec,
	}

	// For retries logic and backoff policy:
	// For slow moving process metrics we are going ahead with 3 retries on failures, which means 4 attempts in total.
	// Attempt - 1 Failure: wait for 30 seconds
	// Attempt - 2 Failure: wait for 60 seconds
	// Attempt - 3 Failure: wait for 120 seconds

	// For fast moving process metrics we are going ahead with 3 retries on failures, which means 4 attempts in total.
	// Attempt - 1 Failure: wait for 5 seconds
	// Attempt - 2 Failure: wait for 10 seconds
	// Attempt - 3 Failure: wait for 20 seconds

	// Note: There is also randomization factor associated with exponential backoffs the intervals can
	// have a delta of 3-4 seconds, which does not affect the overall process.

	pmSlowFreq := p.Config.GetCollectionConfiguration().GetSlowProcessMetricsFrequency()
	pmFastFreq := p.Config.GetCollectionConfiguration().GetProcessMetricsFrequency()

	skippedMetrics := make(map[string]bool)
	skipMetricsForNetweaverKernel(ctx, params.Discovery, skippedMetrics)
	sl := p.Config.GetCollectionConfiguration().GetProcessMetricsToSkip()
	for _, metric := range sl {
		skippedMetrics[metric] = true
	}

	log.CtxLogger(ctx).Info("Creating SAP additional metrics collector for sapservices (active and enabled metric).")
	sapServiceCollector := &sapservice.InstanceProperties{
		Config:          p.Config,
		Client:          p.Client,
		Execute:         commandlineexecutor.ExecuteCommand,
		SkippedMetrics:  skippedMetrics,
		PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
	}

	log.CtxLogger(ctx).Info("Creating SAP control processes per process CPU, memory usage metrics collector.")
	sapStartCollector := &computeresources.SAPControlProcInstanceProperties{
		Config:          p.Config,
		Client:          p.Client,
		Executor:        commandlineexecutor.ExecuteCommand,
		SkippedMetrics:  skippedMetrics,
		PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
	}

	log.CtxLogger(ctx).Info("Creating infra migration event metrics collector.")
	migrationCollector := infra.New(p.Config, p.Client, params.GCEBetaService, skippedMetrics,
		cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute))

	log.CtxLogger(ctx).Info("Creating networkstats metrics collector.")
	networkstatsCollector := &networkstats.Properties{
		Executor:        commandlineexecutor.ExecuteCommand,
		Config:          p.Config,
		Client:          p.Client,
		PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
		SkippedMetrics:  skippedMetrics,
	}

	log.CtxLogger(ctx).Info("Creating volume availability metrics collector.")
	volumeDetailsCollector := &hanavolume.Properties{
		Executor: commandlineexecutor.ExecuteCommand,
		Config:   p.Config,
		Client:   p.Client,
		CommandParams: commandlineexecutor.Params{
			Executable:  "df",
			ArgsToSplit: "-h",
		},
		PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
	}

	p.Collectors = append(
		p.Collectors,
		sapServiceCollector,
		sapStartCollector,
		migrationCollector,
		networkstatsCollector,
		volumeDetailsCollector,
	)

	sids := make(map[string]bool)
	clusterCollectorCreated := false
	for _, instance := range sapInstances.GetInstances() {
		sids[instance.GetSapsid()] = true
		if clusterCollectorCreated == false {
			log.CtxLogger(ctx).Infow("Creating cluster collector for instance", "instance", instance)
			clusterCollector := &cluster.InstanceProperties{
				SAPInstance:     instance,
				Config:          p.Config,
				Client:          p.Client,
				SkippedMetrics:  skippedMetrics,
				PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
			}
			p.Collectors = append(p.Collectors, clusterCollector)
			clusterCollectorCreated = true
		}
		if instance.GetType() == sapb.InstanceType_HANA {
			log.CtxLogger(ctx).Infow("Creating HANA per process CPU, memory usage metrics collector for instance", "instance", instance)
			hanaComputeresourcesCollector := &computeresources.HANAInstanceProperties{
				Config:           p.Config,
				Client:           p.Client,
				Executor:         commandlineexecutor.ExecuteCommand,
				SAPInstance:      instance,
				SAPControlClient: sapcontrolclient.New(instance.GetInstanceNumber()),
				LastValue:        make(map[string]*process.IOCountersStat),
				SkippedMetrics:   skippedMetrics,
				PMBackoffPolicy:  cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
			}

			log.CtxLogger(ctx).Infow("Creating HANA collector for instance.", "instance", instance)
			hanaCollector := &hana.InstanceProperties{
				SAPInstance:        instance,
				Config:             p.Config,
				Client:             p.Client,
				HANAQueryFailCount: 0,
				SkippedMetrics:     skippedMetrics,
				PMBackoffPolicy:    cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
			}
			p.Collectors = append(p.Collectors, hanaComputeresourcesCollector, hanaCollector)

			log.CtxLogger(ctx).Infow("Creating FastMoving Collector for HANA", "instance", instance)
			fmCollector := &fastmovingmetrics.InstanceProperties{
				SAPInstance:        instance,
				Config:             p.Config,
				Client:             p.Client,
				PMBackoffPolicy:    cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmFastFreq)*time.Second, 3, time.Minute, 35*time.Second),
				ReplicationConfig:  sapdiscovery.HANAReplicationConfig,
				SapSystemInterface: params.Discovery,
			}
			p.FastMovingCollectors = append(p.FastMovingCollectors, fmCollector)
		}
		if instance.GetType() == sapb.InstanceType_NETWEAVER {
			log.CtxLogger(ctx).Infow("Creating Netweaver per process CPU, memory usage metrics collector for instance.", "instance", instance)
			netweaverComputeresourcesCollector := &computeresources.NetweaverInstanceProperties{
				Config:           p.Config,
				Client:           p.Client,
				Executor:         commandlineexecutor.ExecuteCommand,
				SAPInstance:      instance,
				SAPControlClient: sapcontrolclient.New(instance.GetInstanceNumber()),
				LastValue:        make(map[string]*process.IOCountersStat),
				SkippedMetrics:   skippedMetrics,
				PMBackoffPolicy:  cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
			}

			log.CtxLogger(ctx).Infow("Creating Netweaver collector for instance.", "instance", instance)
			netweaverCollector := &netweaver.InstanceProperties{
				SAPInstance:     instance,
				Config:          p.Config,
				Client:          p.Client,
				SkippedMetrics:  skippedMetrics,
				PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
			}
			p.Collectors = append(p.Collectors, netweaverComputeresourcesCollector, netweaverCollector)

			log.CtxLogger(ctx).Infow("Creating FastMoving Collector for Netweaver", "instance", instance)
			fmCollector := &fastmovingmetrics.InstanceProperties{
				SAPInstance:        instance,
				Config:             p.Config,
				Client:             p.Client,
				SkippedMetrics:     skippedMetrics,
				PMBackoffPolicy:    cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmFastFreq)*time.Second, 3, time.Minute, 35*time.Second),
				SapSystemInterface: params.Discovery,
			}
			p.FastMovingCollectors = append(p.FastMovingCollectors, fmCollector)
		}
	}

	if len(sids) != 0 {
		log.CtxLogger(ctx).Info("Creating maintenance mode collector.")
		maintenanceModeCollector := &maintenance.InstanceProperties{
			Config:          p.Config,
			Client:          p.Client,
			Reader:          maintenance.ModeReader{},
			Sids:            sids,
			SkippedMetrics:  skippedMetrics,
			PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
		}
		p.Collectors = append(p.Collectors, maintenanceModeCollector)

		if params.PCMParams.WorkloadConfig == nil {
			log.CtxLogger(ctx).Debug("Cannot collect pacemaker metrics, no collection definition detected.")
		} else {
			log.CtxLogger(ctx).Debug("Creating pacemaker metrics collector.")
			pacemakerCollector := &pacemaker.InstanceProperties{
				Config:          p.Config,
				Client:          p.Client,
				Sids:            sids,
				SkippedMetrics:  skippedMetrics,
				PMBackoffPolicy: cloudmonitoring.LongExponentialBackOffPolicy(ctx, time.Duration(pmSlowFreq)*time.Second, 3, 3*time.Minute, 2*time.Minute),
				PacemakerCollector: &pacemaker.Params{
					PCMParams: params.PCMParams,
				},
			}
			p.Collectors = append(p.Collectors, pacemakerCollector)
		}
	}

	log.CtxLogger(ctx).Infow("Created process metrics collectors.", "numberofcollectors", len(p.Collectors))
	return p
}