frontend/pkg/frontend/ocm.go (380 lines of code) (raw):
// Copyright 2025 Microsoft Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package frontend
import (
"context"
"errors"
"fmt"
"net/http"
"github.com/google/uuid"
azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
arohcpv1alpha1 "github.com/openshift-online/ocm-sdk-go/arohcp/v1alpha1"
cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
ocmerrors "github.com/openshift-online/ocm-sdk-go/errors"
"github.com/Azure/ARO-HCP/internal/api"
"github.com/Azure/ARO-HCP/internal/api/arm"
)
const (
csFlavourId string = "osd-4" // managed cluster
csCloudProvider string = "azure"
csProductId string = "aro"
csHypershifEnabled bool = true
csMultiAzEnabled bool = true
csCCSEnabled bool = true
)
func convertListeningToVisibility(listening arohcpv1alpha1.ListeningMethod) (visibility api.Visibility) {
switch listening {
case arohcpv1alpha1.ListeningMethodExternal:
visibility = api.VisibilityPublic
case arohcpv1alpha1.ListeningMethodInternal:
visibility = api.VisibilityPrivate
}
return
}
func convertVisibilityToListening(visibility api.Visibility) (listening arohcpv1alpha1.ListeningMethod) {
switch visibility {
case api.VisibilityPublic:
listening = arohcpv1alpha1.ListeningMethodExternal
case api.VisibilityPrivate:
listening = arohcpv1alpha1.ListeningMethodInternal
}
return
}
func convertOutboundTypeCSToRP(outboundTypeCS string) (outboundTypeRP api.OutboundType) {
switch outboundTypeCS {
case "load_balancer":
outboundTypeRP = api.OutboundTypeLoadBalancer
}
return
}
func convertOutboundTypeRPToCS(outboundTypeRP api.OutboundType) (outboundTypeCS string) {
switch outboundTypeRP {
case api.OutboundTypeLoadBalancer:
outboundTypeCS = "load_balancer"
}
return
}
func convertDisabledCapabilitiesToCS(in []api.OptionalClusterCapability) []string {
var out []string
for _, c := range in {
out = append(out, string(c))
}
return out
}
func convertDisableCapabilitiesToRP(in []string) []api.OptionalClusterCapability {
var out []api.OptionalClusterCapability
for _, c := range in {
out = append(out, api.OptionalClusterCapability(c))
}
return out
}
func convertClusterCapabilitiesToRP(in *arohcpv1alpha1.Cluster) api.ClusterCapabilitiesProfile {
out := api.ClusterCapabilitiesProfile{}
if in == nil {
return out
}
if in.Capabilities() != nil {
out.Disabled = convertDisableCapabilitiesToRP(in.Capabilities().Disabled())
}
return out
}
func convertClusterCapabilitiesToCSBuilder(in api.ClusterCapabilitiesProfile) *arohcpv1alpha1.ClusterCapabilitiesBuilder {
return arohcpv1alpha1.NewClusterCapabilities().
Disabled(convertDisabledCapabilitiesToCS(in.Disabled)...)
}
// ConvertCStoHCPOpenShiftCluster converts a CS Cluster object into HCPOpenShiftCluster object
func ConvertCStoHCPOpenShiftCluster(resourceID *azcorearm.ResourceID, cluster *arohcpv1alpha1.Cluster) *api.HCPOpenShiftCluster {
// A word about ProvisioningState:
// ProvisioningState is stored in Cosmos and is applied to the
// HCPOpenShiftCluster struct along with the ARM metadata that
// is also stored in Cosmos. We could convert the ClusterState
// from Cluster Service to a ProvisioningState, but instead we
// defer that to the backend pod so that the ProvisioningState
// stays consistent with the Status of any active non-terminal
// operation on the cluster.
hcpcluster := &api.HCPOpenShiftCluster{
TrackedResource: arm.TrackedResource{
Location: cluster.Region().ID(),
Resource: arm.Resource{
ID: resourceID.String(),
Name: resourceID.Name,
Type: resourceID.ResourceType.String(),
},
},
Properties: api.HCPOpenShiftClusterProperties{
Version: api.VersionProfile{
ID: cluster.Version().ID(),
ChannelGroup: cluster.Version().ChannelGroup(),
AvailableUpgrades: cluster.Version().AvailableUpgrades(),
},
DNS: api.DNSProfile{
BaseDomain: cluster.DNS().BaseDomain(),
BaseDomainPrefix: cluster.DomainPrefix(),
},
Network: api.NetworkProfile{
NetworkType: api.NetworkType(cluster.Network().Type()),
PodCIDR: cluster.Network().PodCIDR(),
ServiceCIDR: cluster.Network().ServiceCIDR(),
MachineCIDR: cluster.Network().MachineCIDR(),
HostPrefix: int32(cluster.Network().HostPrefix()),
},
Console: api.ConsoleProfile{
URL: cluster.Console().URL(),
},
API: api.APIProfile{
URL: cluster.API().URL(),
Visibility: convertListeningToVisibility(cluster.API().Listening()),
},
Platform: api.PlatformProfile{
ManagedResourceGroup: cluster.Azure().ManagedResourceGroupName(),
SubnetID: cluster.Azure().SubnetResourceID(),
OutboundType: convertOutboundTypeCSToRP(cluster.Azure().NodesOutboundConnectivity().OutboundType()),
NetworkSecurityGroupID: cluster.Azure().NetworkSecurityGroupResourceID(),
IssuerURL: "",
},
Capabilities: convertClusterCapabilitiesToRP(cluster),
},
}
// Each managed identity retrieved from Cluster Service needs to be added
// to the HCPOpenShiftCluster in two places:
// - The top-level Identity.UserAssignedIdentities map will need both the
// resourceID (as keys) and principal+client IDs (as values).
// - The operator-specific maps under OperatorsAuthentication mimics the
// Cluster Service maps but just has operator-to-resourceID pairings.
if cluster.Azure().OperatorsAuthentication() != nil {
if mi, ok := cluster.Azure().OperatorsAuthentication().GetManagedIdentities(); ok {
hcpcluster.Identity.UserAssignedIdentities = make(map[string]*arm.UserAssignedIdentity)
hcpcluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.ControlPlaneOperators = make(map[string]string)
hcpcluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.DataPlaneOperators = make(map[string]string)
for operatorName, operatorIdentity := range mi.ControlPlaneOperatorsManagedIdentities() {
clientID, _ := operatorIdentity.GetClientID()
principalID, _ := operatorIdentity.GetPrincipalID()
hcpcluster.Identity.UserAssignedIdentities[operatorIdentity.ResourceID()] = &arm.UserAssignedIdentity{ClientID: &clientID,
PrincipalID: &principalID}
hcpcluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.ControlPlaneOperators[operatorName] = operatorIdentity.ResourceID()
}
for operatorName, operatorIdentity := range mi.DataPlaneOperatorsManagedIdentities() {
// Skip adding to hcpcluster.Identity.UserAssignedIdentities map as it is not needed for the dataplane operator MIs.
hcpcluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.DataPlaneOperators[operatorName] = operatorIdentity.ResourceID()
}
clientID, _ := mi.ServiceManagedIdentity().GetClientID()
principalID, _ := mi.ServiceManagedIdentity().GetPrincipalID()
hcpcluster.Identity.UserAssignedIdentities[mi.ServiceManagedIdentity().ResourceID()] = &arm.UserAssignedIdentity{ClientID: &clientID,
PrincipalID: &principalID}
hcpcluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.ServiceManagedIdentity = mi.ServiceManagedIdentity().ResourceID()
}
}
return hcpcluster
}
// ensureManagedResourceGroupName makes sure the ManagedResourceGroupName field is set.
// If the field is empty a default is generated.
func ensureManagedResourceGroupName(hcpCluster *api.HCPOpenShiftCluster) string {
if hcpCluster.Properties.Platform.ManagedResourceGroup != "" {
return hcpCluster.Properties.Platform.ManagedResourceGroup
}
var clusterName string
if len(hcpCluster.Name) >= 45 {
clusterName = (hcpCluster.Name)[:45]
} else {
clusterName = hcpCluster.Name
}
return "arohcp-" + clusterName + "-" + uuid.New().String()
}
// BuildCSCluster creates a CS Cluster object from an HCPOpenShiftCluster object
func (f *Frontend) BuildCSCluster(resourceID *azcorearm.ResourceID, requestHeader http.Header, hcpCluster *api.HCPOpenShiftCluster, updating bool) (*arohcpv1alpha1.Cluster, error) {
// Ensure required headers are present.
tenantID := requestHeader.Get(arm.HeaderNameHomeTenantID)
if tenantID == "" {
return nil, fmt.Errorf("missing " + arm.HeaderNameHomeTenantID + " header")
}
clusterBuilder := arohcpv1alpha1.NewCluster()
// FIXME HcpOpenShiftCluster attributes not being passed:
// ExternalAuth (TODO, complicated)
// These attributes cannot be updated after cluster creation.
if !updating {
// Add attributes that cannot be updated after cluster creation.
clusterBuilder = withImmutableAttributes(clusterBuilder, hcpCluster,
resourceID.SubscriptionID,
resourceID.ResourceGroupName,
f.location,
tenantID,
requestHeader.Get(arm.HeaderNameIdentityURL),
)
}
clusterBuilder = f.clusterServiceClient.AddProperties(clusterBuilder)
return clusterBuilder.Build()
}
func withImmutableAttributes(clusterBuilder *arohcpv1alpha1.ClusterBuilder, hcpCluster *api.HCPOpenShiftCluster, subscriptionID, resourceGroupName, location, tenantID, identityURL string) *arohcpv1alpha1.ClusterBuilder {
clusterBuilder = clusterBuilder.
Name(hcpCluster.Name).
Flavour(cmv1.NewFlavour().
ID(csFlavourId)).
Region(cmv1.NewCloudRegion().
ID(location)).
CloudProvider(cmv1.NewCloudProvider().
ID(csCloudProvider)).
Product(cmv1.NewProduct().
ID(csProductId)).
Hypershift(arohcpv1alpha1.NewHypershift().
Enabled(csHypershifEnabled)).
MultiAZ(csMultiAzEnabled).
CCS(arohcpv1alpha1.NewCCS().Enabled(csCCSEnabled)).
Version(cmv1.NewVersion().
ID(hcpCluster.Properties.Version.ID).
ChannelGroup(hcpCluster.Properties.Version.ChannelGroup)).
Network(arohcpv1alpha1.NewNetwork().
Type(string(hcpCluster.Properties.Network.NetworkType)).
PodCIDR(hcpCluster.Properties.Network.PodCIDR).
ServiceCIDR(hcpCluster.Properties.Network.ServiceCIDR).
MachineCIDR(hcpCluster.Properties.Network.MachineCIDR).
HostPrefix(int(hcpCluster.Properties.Network.HostPrefix))).
API(arohcpv1alpha1.NewClusterAPI().
Listening(convertVisibilityToListening(hcpCluster.Properties.API.Visibility))).
Capabilities(convertClusterCapabilitiesToCSBuilder(hcpCluster.Properties.Capabilities))
azureBuilder := arohcpv1alpha1.NewAzure().
TenantID(tenantID).
SubscriptionID(subscriptionID).
ResourceGroupName(resourceGroupName).
ResourceName(hcpCluster.Name).
ManagedResourceGroupName(ensureManagedResourceGroupName(hcpCluster)).
SubnetResourceID(hcpCluster.Properties.Platform.SubnetID).
NodesOutboundConnectivity(arohcpv1alpha1.NewAzureNodesOutboundConnectivity().
OutboundType(convertOutboundTypeRPToCS(hcpCluster.Properties.Platform.OutboundType)))
// Cluster Service rejects an empty NetworkSecurityGroupResourceID string.
if hcpCluster.Properties.Platform.NetworkSecurityGroupID != "" {
azureBuilder = azureBuilder.
NetworkSecurityGroupResourceID(hcpCluster.Properties.Platform.NetworkSecurityGroupID)
}
// Only pass managed identity information if the x-ms-identity-url header is present.
if identityURL != "" {
controlPlaneOperators := make(map[string]*arohcpv1alpha1.AzureControlPlaneManagedIdentityBuilder)
for operatorName, identityResourceID := range hcpCluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.ControlPlaneOperators {
controlPlaneOperators[operatorName] = arohcpv1alpha1.NewAzureControlPlaneManagedIdentity().ResourceID(identityResourceID)
}
dataPlaneOperators := make(map[string]*arohcpv1alpha1.AzureDataPlaneManagedIdentityBuilder)
for operatorName, identityResourceID := range hcpCluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.DataPlaneOperators {
dataPlaneOperators[operatorName] = arohcpv1alpha1.NewAzureDataPlaneManagedIdentity().ResourceID(identityResourceID)
}
managedIdentitiesBuilder := arohcpv1alpha1.NewAzureOperatorsAuthenticationManagedIdentities().
ManagedIdentitiesDataPlaneIdentityUrl(identityURL).
ControlPlaneOperatorsManagedIdentities(controlPlaneOperators).
DataPlaneOperatorsManagedIdentities(dataPlaneOperators)
if hcpCluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.ServiceManagedIdentity != "" {
managedIdentitiesBuilder = managedIdentitiesBuilder.ServiceManagedIdentity(arohcpv1alpha1.NewAzureServiceManagedIdentity().
ResourceID(hcpCluster.Properties.Platform.OperatorsAuthentication.UserAssignedIdentities.ServiceManagedIdentity))
}
azureBuilder = azureBuilder.OperatorsAuthentication(
arohcpv1alpha1.NewAzureOperatorsAuthentication().ManagedIdentities(managedIdentitiesBuilder))
}
clusterBuilder = clusterBuilder.Azure(azureBuilder)
// Cluster Service rejects an empty DomainPrefix string.
if hcpCluster.Properties.DNS.BaseDomainPrefix != "" {
clusterBuilder = clusterBuilder.
DomainPrefix(hcpCluster.Properties.DNS.BaseDomainPrefix)
}
return clusterBuilder
}
// ConvertCStoNodePool converts a CS Node Pool object into HCPOpenShiftClusterNodePool object
func ConvertCStoNodePool(resourceID *azcorearm.ResourceID, np *arohcpv1alpha1.NodePool) *api.HCPOpenShiftClusterNodePool {
nodePool := &api.HCPOpenShiftClusterNodePool{
TrackedResource: arm.TrackedResource{
Resource: arm.Resource{
ID: resourceID.String(),
Name: resourceID.Name,
Type: resourceID.ResourceType.String(),
},
},
Properties: api.HCPOpenShiftClusterNodePoolProperties{
Version: api.NodePoolVersionProfile{
ID: np.Version().ID(),
ChannelGroup: np.Version().ChannelGroup(),
AvailableUpgrades: np.Version().AvailableUpgrades(),
},
Platform: api.NodePoolPlatformProfile{
SubnetID: np.Subnet(),
VMSize: np.AzureNodePool().VMSize(),
DiskStorageAccountType: api.DiskStorageAccountType(np.AzureNodePool().OSDiskStorageAccountType()),
AvailabilityZone: np.AvailabilityZone(),
DiskSizeGiB: int32(np.AzureNodePool().OSDiskSizeGibibytes()),
},
AutoRepair: np.AutoRepair(),
Labels: np.Labels(),
},
}
if replicas, ok := np.GetReplicas(); ok {
nodePool.Properties.Replicas = int32(replicas)
}
if autoscaling, ok := np.GetAutoscaling(); ok {
nodePool.Properties.AutoScaling = &api.NodePoolAutoScaling{
Min: int32(autoscaling.MinReplica()),
Max: int32(autoscaling.MaxReplica()),
}
}
taints := make([]api.Taint, 0, len(np.Taints()))
for _, t := range np.Taints() {
taints = append(taints, api.Taint{
Effect: api.Effect(t.Effect()),
Key: t.Key(),
Value: t.Value(),
})
}
nodePool.Properties.Taints = taints
return nodePool
}
// BuildCSNodePool creates a CS Node Pool object from an HCPOpenShiftClusterNodePool object
func (f *Frontend) BuildCSNodePool(ctx context.Context, nodePool *api.HCPOpenShiftClusterNodePool, updating bool) (*arohcpv1alpha1.NodePool, error) {
npBuilder := arohcpv1alpha1.NewNodePool()
// These attributes cannot be updated after node pool creation.
if !updating {
npBuilder = npBuilder.
ID(nodePool.Name).
Version(arohcpv1alpha1.NewVersion().
ID(nodePool.Properties.Version.ID).
ChannelGroup(nodePool.Properties.Version.ChannelGroup).
AvailableUpgrades(nodePool.Properties.Version.AvailableUpgrades...)).
Subnet(nodePool.Properties.Platform.SubnetID).
AzureNodePool(arohcpv1alpha1.NewAzureNodePool().
ResourceName(nodePool.Name).
VMSize(nodePool.Properties.Platform.VMSize).
OSDiskSizeGibibytes(int(nodePool.Properties.Platform.DiskSizeGiB)).
OSDiskStorageAccountType(string(nodePool.Properties.Platform.DiskStorageAccountType))).
AvailabilityZone(nodePool.Properties.Platform.AvailabilityZone).
AutoRepair(nodePool.Properties.AutoRepair)
}
npBuilder = npBuilder.
Labels(nodePool.Properties.Labels)
if nodePool.Properties.AutoScaling != nil {
npBuilder.Autoscaling(arohcpv1alpha1.NewNodePoolAutoscaling().
MinReplica(int(nodePool.Properties.AutoScaling.Min)).
MaxReplica(int(nodePool.Properties.AutoScaling.Max)))
} else {
npBuilder.Replicas(int(nodePool.Properties.Replicas))
}
for _, t := range nodePool.Properties.Taints {
npBuilder = npBuilder.Taints(arohcpv1alpha1.NewTaint().
Effect(string(t.Effect)).
Key(t.Key).
Value(t.Value))
}
return npBuilder.Build()
}
// ConvertCStoAdminCredential converts a CS BreakGlassCredential object into an HCPOpenShiftClusterAdminCredential.
func ConvertCStoAdminCredential(breakGlassCredential *cmv1.BreakGlassCredential) *api.HCPOpenShiftClusterAdminCredential {
return &api.HCPOpenShiftClusterAdminCredential{
ExpirationTimestamp: breakGlassCredential.ExpirationTimestamp(),
Kubeconfig: breakGlassCredential.Kubeconfig(),
}
}
// CSErrorToCloudError attempts to convert various 4xx status codes from
// Cluster Service to an ARM-compliant error structure, with 500 Internal
// Server Error as a last-ditch fallback.
func CSErrorToCloudError(err error, resourceID *azcorearm.ResourceID) *arm.CloudError {
var ocmError *ocmerrors.Error
if errors.As(err, &ocmError) {
switch statusCode := ocmError.Status(); statusCode {
case http.StatusBadRequest:
// BadRequest can be returned when an object fails validation.
//
// We try our best to mimic Cluster Service's validation for a
// couple reasons:
//
// 1) Whereas Cluster Service aborts on the first validation error,
// we try to report as many validation errors as possible at once
// for a better user experience.
//
// 2) CloudErrorBody.Target should reference the erroneous field but
// validation errors from Cluster Service cannot easily be mapped
// to a field without extensive pattern matching of the reason.
//
// That said, Cluster Service's validation is more comprehensive and
// probably always will be. So it's important we try to handle their
// errors as best we can.
return arm.NewCloudError(
statusCode,
arm.CloudErrorCodeInvalidRequestContent,
"", "%s", ocmError.Reason())
case http.StatusNotFound:
if resourceID != nil {
return arm.NewResourceNotFoundError(resourceID)
}
return arm.NewCloudError(
statusCode,
arm.CloudErrorCodeNotFound,
"", "%s", ocmError.Reason())
case http.StatusConflict:
var target string
if resourceID != nil {
target = resourceID.String()
}
return arm.NewCloudError(
statusCode,
arm.CloudErrorCodeConflict,
target, "%s", ocmError.Reason())
}
}
return arm.NewInternalServerError()
}
// transportFunc implements the http.RoundTripper interface.
type transportFunc func(*http.Request) (*http.Response, error)
var _ = http.RoundTripper(transportFunc(nil))
func (rtf transportFunc) RoundTrip(r *http.Request) (*http.Response, error) {
return rtf(r)
}
const clusterServiceRequestIDHeader = "X-Request-ID"
// RequestIDPropagator returns an http.RoundTripper interface which reads the
// request ID from the request's context and propagates it to the Clusters
// Service API via the "X-Request-ID" header.
func RequestIDPropagator(next http.RoundTripper) http.RoundTripper {
return transportFunc(func(r *http.Request) (*http.Response, error) {
correlationData, err := CorrelationDataFromContext(r.Context())
if err == nil {
r = r.Clone(r.Context())
r.Header.Set(clusterServiceRequestIDHeader, correlationData.RequestID.String())
}
return next.RoundTrip(r)
})
}