func initDeploymentMetrics()

in metrics/metrics.go [71:341]


func initDeploymentMetrics(pluggables *[]handlers.Pluggable) error {
	subsystem := "deployment"
	deploymentLabelValues := map[string][]string{
		"deployment_type": {"coordinator_pipeline"},
		"target_env":      {"gstg-ref", "gstg-cny", "gstg", "gprd-cny", "gprd"},
	}

	durationHistogram, err := metrics.NewHistogramVec(
		metrics.WithName("duration_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Duration of the coordinated deployment pipeline, from staging to production"),
		metrics.WithBuckets(prometheus.LinearBuckets(12_600, 30*60, 14)), // 14 buckets of 30 minutes ranging from 3.5hrs to 10h.
		metrics.WithLabel(labels.FromValues("deployment_type", deploymentLabelValues["deployment_type"])),
		metrics.WithLabel(labels.SuccessOrFailed("status")),
		metrics.WithCartesianProductLabelReset(),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewHistogram(durationHistogram))

	durationGauge, err := metrics.NewGaugeVec(
		metrics.WithName("duration_last_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Duration of the last coordinated deployment pipeline, from staging to production"),
		metrics.WithLabel(labels.FromValues("deployment_type", deploymentLabelValues["deployment_type"])),
		metrics.WithLabel(labels.SuccessOrFailed("status")),
		metrics.WithLabel(labels.FullDeployVersion("deploy_version")),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewGauge(durationGauge))

	leadtimeGauge, err := metrics.NewGaugeVec(
		metrics.WithName("merge_request_lead_time_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Time it takes from MR merge to production"),
		metrics.WithLabel(labels.Environment("target_env")),
		metrics.WithLabel(labels.Stage("target_stage")),
		metrics.WithLabel(labels.Integer("deployment_id")),
		metrics.WithLabel(labels.Integer("mr_id")),
		metrics.WithLabel(labels.FullDeployVersion("deploy_version")),
	)
	if err != nil {
		return err
	}
	*pluggables = append(*pluggables, handlers.NewGauge(leadtimeGauge))

	adjustedLeadtimeGauge, err := metrics.NewGaugeVec(
		metrics.WithName("merge_request_adjusted_lead_time_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Time it takes from MR merge to production, adjusted to ignore weekends"),
		metrics.WithLabel(labels.Environment("target_env")),
		metrics.WithLabel(labels.Stage("target_stage")),
		metrics.WithLabel(labels.Integer("deployment_id")),
		metrics.WithLabel(labels.Integer("mr_id")),
		metrics.WithLabel(labels.FullDeployVersion("deploy_version")),
	)
	if err != nil {
		return err
	}
	*pluggables = append(*pluggables, handlers.NewGauge(adjustedLeadtimeGauge))

	envLabelValues := labels.Environment("").Values()
	stageLabelValues := labels.Stage("").Values()

	envLabelValuesWithEmpty := append(envLabelValues, "")
	stageLabelValuesWithEmpty := append(stageLabelValues, "")

	pipelineDurationGauge, err := metrics.NewGaugeVec(
		metrics.WithName("pipeline_duration_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Wall clock duration of pipelines"),
		metrics.WithLabel(labels.AutoDeployPipelineProjects("project_name")),
		metrics.WithLabel(labels.FullDeployVersion("deploy_version")),
		metrics.WithLabel(labels.PipelineStatus("pipeline_status")),
		metrics.WithLabel(labels.AnyString("pipeline_name")),
		metrics.WithLabel(labels.Integer("pipeline_id")),
		metrics.WithLabel(labels.WebURL("web_url")), // Used for the dashboard links
		metrics.WithLabel(labels.FromValues("target_env", envLabelValuesWithEmpty)),
		metrics.WithLabel(labels.FromValues("target_stage", stageLabelValuesWithEmpty)),
		metrics.WithLabel(labels.AnyString("upstream_pipeline_name")),
	)
	if err != nil {
		return err
	}
	*pluggables = append(*pluggables, handlers.NewGauge(pipelineDurationGauge))

	deployerPipelineDurationHistogram, err := metrics.NewHistogramVec(
		metrics.WithName("coordinator_pipeline_duration_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Wall clock duration of deployment coordinator pipelines"),
		// Deployment pipelines usually take about 4-6 hours
		metrics.WithBuckets(prometheus.LinearBuckets(12_600, 30*60, 14)), // 14 buckets of 30 minutes ranging from 3.5hrs to 10h.
		metrics.WithLabel(labels.PipelineStatus("pipeline_status")),
	)
	if err != nil {
		return err
	}
	*pluggables = append(*pluggables, handlers.NewHistogram(deployerPipelineDurationHistogram))

	packagerOmnibusPipelineDurationHistogram, err := metrics.NewHistogramVec(
		metrics.WithName("packager_omnibus_pipeline_duration_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Duration of Omnibus packaging pipeline"),
		// Omnibus packaging pipelines typically take about 50 minutes - 1.5 hours
		metrics.WithBuckets(prometheus.LinearBuckets(1800, 10*60, 10)), // 10 buckets of 10 minutes ranging from 30 minutes to 2 hours 10 minutes
		metrics.WithLabel(labels.PipelineStatus("pipeline_status")),
	)
	if err != nil {
		return err
	}
	*pluggables = append(*pluggables, handlers.NewHistogram(packagerOmnibusPipelineDurationHistogram))

	packagerCngPipelineDurationHistogram, err := metrics.NewHistogramVec(
		metrics.WithName("packager_cng_pipeline_duration_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Duration of CNG packaging pipeline"),
		// CNG packaging pipelines typically take about 40 minutes - 1 hour
		metrics.WithBuckets(prometheus.LinearBuckets(1800, 6*60, 10)), // 10 buckets of 6 minutes ranging from 30 minutes to 1.5 hours
		metrics.WithLabel(labels.PipelineStatus("pipeline_status")),
	)
	if err != nil {
		return err
	}
	*pluggables = append(*pluggables, handlers.NewHistogram(packagerCngPipelineDurationHistogram))

	jobDurationGauge, err := metrics.NewGaugeVec(
		metrics.WithName("job_duration_seconds"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Wall clock duration of jobs"),
		metrics.WithLabel(labels.AnyString("job_name")),
		metrics.WithLabel(labels.AnyString("job_stage")),
		metrics.WithLabel(labels.SuccessOrFailed("job_status")),
		metrics.WithLabel(labels.AutoDeployPipelineProjects("project_name")),
		metrics.WithLabel(labels.FullDeployVersion("deploy_version")),
		metrics.WithLabel(labels.FromValues("target_env", envLabelValuesWithEmpty)),
		metrics.WithLabel(labels.FromValues("target_stage", stageLabelValuesWithEmpty)),
		metrics.WithLabel(labels.AnyString("short_job_name")),
		metrics.WithLabel(labels.WebURL("web_url")), // Used for the dashboard links
		metrics.WithLabel(labels.Integer("job_id")),
		metrics.WithLabel(labels.Integer("pipeline_id")),
		metrics.WithLabel(labels.AnyString("pipeline_name")),
	)
	if err != nil {
		return err
	}
	*pluggables = append(*pluggables, handlers.NewGauge(jobDurationGauge))

	deploymentStartedCounter, err := metrics.NewCounterVec(
		metrics.WithName("started_total"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Number of deployments started for each environment"),
		metrics.WithLabel(labels.FromValues("target_env", deploymentLabelValues["target_env"])),
		metrics.WithCartesianProductLabelReset(),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewCounter(deploymentStartedCounter))

	deploymentCanRollbackCounter, err := metrics.NewCounterVec(
		metrics.WithName("can_rollback_total"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Number of deployments suitable for rollback for each environment"),
		metrics.WithLabel(labels.FromValues("target_env", deploymentLabelValues["target_env"])),
		metrics.WithCartesianProductLabelReset(),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewCounter(deploymentCanRollbackCounter))

	rollbacksCounter, err := metrics.NewCounterVec(
		metrics.WithName("rollbacks_started_total"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Number of rollbacks started for each environment"),
		metrics.WithLabel(labels.FromValues("target_env", deploymentLabelValues["target_env"])),
		metrics.WithCartesianProductLabelReset(),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewCounter(rollbacksCounter))

	deploymentCompletedCounter, err := metrics.NewCounterVec(
		metrics.WithName("completed_total"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Number of deployments completed for each environment"),
		metrics.WithLabel(labels.FromValues("target_env", deploymentLabelValues["target_env"])),
		metrics.WithCartesianProductLabelReset(),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewCounter(deploymentCompletedCounter))

	deploymentStartedGauge, err := metrics.NewGaugeVec(
		metrics.WithName("started"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Number of deployments started for each environment as gauge"),
		metrics.WithLabel(labels.FromValues("target_env", deploymentLabelValues["target_env"])),
		metrics.WithLabel(labels.FullDeployVersion("version")),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewGauge(deploymentStartedGauge))

	deploymentCompletedGauge, err := metrics.NewGaugeVec(
		metrics.WithName("completed"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Number of deployments completed for each environment as gauge"),
		metrics.WithLabel(labels.FromValues("target_env", deploymentLabelValues["target_env"])),
		metrics.WithLabel(labels.FullDeployVersion("version")),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewGauge(deploymentCompletedGauge))

	deploymentFailedAtLeastOnceGauge, err := metrics.NewGaugeVec(
		metrics.WithName("failed_atleast_once"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("A deployment with at least one failed job"),
		metrics.WithLabel(labels.FullDeployVersion("deploy_version")),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewGauge(deploymentFailedAtLeastOnceGauge))

	deploymentBlockerCountGauge, err := metrics.NewGaugeVec(
		metrics.WithName("blocker_count"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Deployment Blocker Count Per Category"),
		metrics.WithLabel(labels.RootCause("root_cause")),
		metrics.WithLabel(labels.Date("week")),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewGauge(deploymentBlockerCountGauge))

	deploymentHoursBlockedGauge, err := metrics.NewGaugeVec(
		metrics.WithName("hours_blocked"),
		metrics.WithSubsystem(subsystem),
		metrics.WithHelp("Deployment Hours Blocked"),
		metrics.WithLabel(labels.RootCause("root_cause")),
		metrics.WithLabel(labels.Environment("target_env")),
		metrics.WithLabel(labels.Date("week")),
	)
	if err != nil {
		return err
	}

	*pluggables = append(*pluggables, handlers.NewGauge(deploymentHoursBlockedGauge))

	return nil
}