Scripts/FixExpiredCert-AEPCC.ps1 (373 lines of code) (raw):
#Requires -Version 3.0
# ------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See License.txt in the repo root for license information.
# Feedback : pkc@microsoft.com
# ------------------------------------------------------------
<#
.SYNOPSIS
Fix the Service Fabric Cluster with Expired Certificate (Self Signed only)
.DESCRIPTION
Script can be used to recover the Service Fabric cluster with expired certificate.
.PARAMETER nodeIpArray
By default script will automatically grab the IP address for all the nodes
.PARAMETER clusterDataRootPath
Cluster data root path. default 'd:\svcfab'
.PARAMETER tempPath
A temporary working folder to copy and work with Cluster Manifest and Settings files
.PARAMETER cacheCredentials
switch to optionally enable storing credentials in $global:creds variable.
to clear, execute: $global:creds=$null
.PARAMETER localOnly
switch to optionally run script only on local node.
use when there are connectivity issues between nodes by rdp'ing to each node and running with this switch.
.LINK
iwr https://raw.githubusercontent.com/Azure/Service-Fabric-Troubleshooting-Guides/master/Scripts/FixExpiredCert-AEPCC.ps1 -out $pwd/FixExpiredCert-AEPCC.ps1
#>
Param(
[ValidateNotNullOrEmpty()]
[string[]] $nodeIpArray = @("0"),
[string]$clusterDataRootPath = 'd:\svcfab',
[ValidateNotNullOrEmpty()]
[string]$tempPath = 'd:\temp\certwork',
[switch]$cacheCredentials,
[switch]$localOnly
)
$ErrorActionPreference = 'continue'
$supportedVersion = [version]"6.5.658.9590"
$currentVersion = [version]"0.0"
$startTime = get-date
$global:failNodes = @()
$global:successNodes = @()
$SFEnv = Get-ItemProperty -Path "HKLM:\SOFTWARE\Microsoft\Service Fabric"
$defaultBinary = 'C:\Program Files\Microsoft Service Fabric\bin\FabricHost.exe'
$creds = $null
if ($SFEnv.FabricDataRoot) {
$clusterDataRootPath = $SFEnv.FabricDataRoot
}
if ($SFEnv.FabricVersion) {
$currentVersion = $SFEnv.FabricVersion
}
else {
$currentVersion = [io.fileinfo]::new($defaultBinary).VersionInfo.FileVersion
}
#Verifying whether SF Runtime is un supported version for AEPCC parameter
if ($currentVersion -lt $supportedVersion ) {
write-warning "This Script is supported for the Service Fabric runtime version greater than $supportedVersion.
Please leverage external TSG
'https://github.com/Azure/Service-Fabric-Troubleshooting-Guides/blob/master/Security/Fix%20Expired%20Cluster%20Certificate%20Automated%20Script.md'
for fixing the issue with : $($SFEnv.FabricVersion )"
return
}
If (!(Test-Path $clusterDataRootPath)) {
write-warning $clusterDataRootPath " not found, exiting."
return
}
#Saving current list of Trusted Hosts
$curValue = (get-item wsman:\localhost\Client\TrustedHosts).value
$scriptBlock = {
param($clusterDataRootPath, $tempPath)
<#
.SYNOPSIS
. Updating Cluster Manifest file with AEPCC Parameter
#>
function updateManifest {
Write-Host "$env:computername : Begin updating ClusterManifest.xml File"
$manFile = $tempPath + "\clustermanifest.current.xml"
$newManifest = $tempPath + "\modified_clustermanifest.xml"
#Checking the AEPCC property value
[object]$tempManAEPCC = get-content $manFile | select-string -pattern '<Parameter Name="AcceptExpiredPinnedClusterCertificate" Value="false" />' -AllMatches
if ($tempManAEPCC) {
Write-Host "$env:computername : AEPCC is False"
$intermediateManifest = $tempPath + "\intermediate_clustermanifest.xml"
get-content $manFile | ? { $_.trim() -ne '<Parameter Name="AcceptExpiredPinnedClusterCertificate" Value="false" />' } | set-content $intermediateManifest
$manFile = $intermediateManifest
}
$ModContent = Get-Content -Path $manFile |
ForEach-Object {
# Output the existing line to pipeline in any case
$_
if ($_ -match '<Section Name="Security">' ) {
' <Parameter Name="AcceptExpiredPinnedClusterCertificate" Value="true" />'
}
}
$ModContent | Out-File -FilePath $newManifest -Encoding Default -Force
Write-Host "$env:computername : Updated the ClusterManifest.xml File : $newManifest"
}
<#
.SYNOPSIS
. Updating Cluster Setting file with AEPCC Parameter
#>
function updateSettings {
Write-Host "$env:computername : Begin updating Settings.xml File"
$settingFile = $tempPath + "\Settings.xml"
$newSettings = $tempPath + "\modified_settings.xml"
#Checking the AEPCC property value
[object]$tempSettingAEPCC = get-content $settingFile | select-string -pattern '<Parameter Name="AcceptExpiredPinnedClusterCertificate" Value="false" />' -AllMatches
if ($tempSettingAEPCC) {
Write-Host "$env:computername : AEPCC is False"
$intermediateSettings = $tempPath + "\intermediate_Settings.xml"
get-content $settingFile | ? { $_.trim() -ne '<Parameter Name="AcceptExpiredPinnedClusterCertificate" Value="false" />' } | set-content $intermediateSettings
$settingFile = $intermediateSettings
}
$ModContent = Get-Content -Path $settingFile |
ForEach-Object {
$_
if ($_ -match '<Section Name="Security">' ) {
' <Parameter Name="AcceptExpiredPinnedClusterCertificate" Value="true" />'
}
}
$ModContent | Out-File -FilePath $newSettings -Encoding Default -Force
Write-Host "$env:computername : Updated Settings.xml $newSettings"
}
<#
.SYNOPSIS
. Stopping both SFNBA and FabricHost
#>
function StopServiceFabricServices {
if ($(Get-Process | ? ProcessName -like "*FabricInstaller*" | measure).Count -gt 0) {
Write-Warning "$env:computername : Found FabricInstaller running, may cause issues if not stopped, consult manual guide..."
Write-Host "$env:computername : Pausing (15s)..."
Start-Sleep -Seconds 15
}
$bootstrapAgent = "ServiceFabricNodeBootstrapAgent"
$fabricHost = "FabricHostSvc"
$bootstrapService = Get-Service -Name $bootstrapAgent
if ($bootstrapService.Status -eq "Running") {
Stop-Service $bootstrapAgent -ErrorAction SilentlyContinue
Write-Host "$env:computername : Stopping $bootstrapAgent service"
}
Do {
Start-Sleep -Seconds 1
$bootstrapService = Get-Service -Name $bootstrapAgent
if(!$bootstrapService) {
break
}
if ($bootstrapService.Status -eq "Stopped") {
Write-Host "$env:computername : $bootstrapAgent now stopped"
}
else {
Write-Host "$env:computername : $bootstrapAgent current status: $($bootstrapService.Status)"
}
} While ($bootstrapService.Status -ne "Stopped")
$fabricHostService = Get-Service -Name $fabricHost
if ($fabricHostService.Status -eq "Running") {
Stop-Service $fabricHost -ErrorAction SilentlyContinue
Write-Host "$env:computername : Stopping $fabricHost service"
}
Do {
Start-Sleep -Seconds 1
$fabricHostService = Get-Service -Name $fabricHost
if(!$fabricHostService) {
break
}
if ($fabricHostService.Status -eq "Stopped") {
Write-Host "$env:computername : $fabricHost now stopped"
}
else {
Write-Host "$env:computername : $fabricHost current status: $($fabricHostService.Status)"
}
} While ($fabricHostService.Status -ne "Stopped")
}
<#
.SYNOPSIS
. Starting both SFNBA and FabricHost
#>
function StartServiceFabricServices {
$bootstrapAgent = "ServiceFabricNodeBootstrapAgent"
$fabricHost = "FabricHostSvc"
$fabricHostService = Get-Service -Name $fabricHost
if ($fabricHostService.Status -eq "Stopped") {
Start-Service $fabricHost -ErrorAction SilentlyContinue
Write-Host "$env:computername : Starting $fabricHost service"
}
Do {
Start-Sleep -Seconds 1
$fabricHostService = Get-Service -Name $fabricHost
if(!$fabricHostService) {
break
}
if ($fabricHostService.Status -eq "Running") {
Write-Host "$env:computername : $fabricHost now running"
}
else {
Write-Host "$env:computername : $fabricHost current status: $($fabricHostService.Status)"
}
} While ($fabricHostService.Status -ne "Running")
$bootstrapService = Get-Service -Name $bootstrapAgent
if ($bootstrapService.Status -eq "Stopped") {
Start-Service $bootstrapAgent -ErrorAction SilentlyContinue
Write-Host "$env:computername : Starting $bootstrapAgent service"
}
do {
Start-Sleep -Seconds 1
$bootstrapService = Get-Service -Name $bootstrapAgent
if(!$bootstrapService) {
break
}
if ($bootstrapService.Status -eq "Running") {
Write-Host "$env:computername : $bootstrapAgent now running"
}
else {
Write-Host "$env:computername : $bootstrapAgent current status: $($bootstrapService.Status)"
}
} While ($bootstrapService.Status -ne "Running")
}
#config files we need
#"D:\SvcFab\ClusterManifest.current.xml"
#"D:\SvcFab\<<node name>>\Fabric\Fabric.Config.<highest version> \Settings.xml"
$result = Get-ChildItem -Path $clusterDataRootPath -Filter "Fabric.Data" -Directory -Recurse
$hostPath = $result.Parent.Parent.Name
Write-Host "---- Node Name : " $hostPath
Write-Host "---------------------------------------------------------------------------------------------------------"
$manifestPath = $clusterDataRootPath + "\" + $hostPath + "\Fabric\ClusterManifest.current.xml"
$infrastructureManifest = $clusterDataRootPath + "\" + $hostPath + "\Fabric\Fabric.Data\InfrastructureManifest.xml"
# Validating whether Manifest file already contain AEPCC parameter with true
[object]$tempAEPCC = get-content $manifestPath | select-string -pattern '<Parameter Name="AcceptExpiredPinnedClusterCertificate" Value="true" />' -AllMatches
If (!$tempAEPCC) {
#to get the settings.xml we need to determine the current version
#"D:\SvcFab\<node name>\Fabric\Fabric.Package.current.xml" --> Read to determine version# <ConfigPackage Name="Fabric.Config" Version="1.131523081591497214" />
$currentPackage = $clusterDataRootPath + "\" + $hostPath + "\Fabric\Fabric.Package.current.xml"
$currentPackageXml = [xml](Get-Content $currentPackage)
$packageName = $currentPackageXml.ServicePackage.DigestedConfigPackage.ConfigPackage | Select-Object -ExpandProperty Name
$packageVersion = $currentPackageXml.ServicePackage.DigestedConfigPackage.ConfigPackage | Select-Object -ExpandProperty Version
$SettingsFile = $clusterDataRootPath + "\" + $hostPath + "\Fabric\" + $packageName + "." + $packageVersion + "\settings.xml"
$SettingsPath = $clusterDataRootPath + "\" + $hostPath + "\Fabric\" + $packageName + "." + $packageVersion
Write-Host "$env:computername : Settings file: " $SettingsFile
Write-Host "$env:computername : Settings path: " $SettingsPath
# create a temp folder
$tempFolder = New-Item -ItemType Directory -Force -Path $tempPath
Write-Host "$env:computername : Created the temp Work folder :" $tempFolder
#copy current config to the temp folder
Copy-Item -Path $manifestPath -Destination $tempPath -Force -Verbose
$newManifest = $tempPath + "\modified_clustermanifest.xml"
Copy-Item -Path $SettingsFile -Destination $tempPath -Force -Verbose
$newSettings = $tempPath + "\modified_settings.xml"
# Appending cluster manifest File with AcceptExpiredPinnedClusterCertificate with value true
updateManifest
# Appending cluster Settings File with AcceptExpiredPinnedClusterCertificate with value true
updateSettings
### Backup....
$backupSettingsFile = $SettingsPath + "\settings_backup.xml"
Copy-Item -Path $SettingsFile -Destination $backupSettingsFile -Force -Verbose
Copy-Item -Path $newSettings -Destination $SettingsFile -Force -Verbose
#stop these services
Write-Host "$env:computername : Stopping services"
StopServiceFabricServices
#update the node configuration
$logRoot = $clusterDataRootPath + "\Log"
Write-Host "$env:computername : Updating Node configuration with new setting AcceptExpiredPinnedClusterCertificate "
#For Debugging
Write-Host "$env:computername : Cluster Manifest $newManifest"
Write-Host "$env:computername : Log Root $logRoot"
Write-Host "$env:computername : Cluster Data Path : $clusterDataRootPath"
Write-Host "$env:computername : Infra : $infrastructureManifest"
New-ServiceFabricNodeConfiguration -FabricDataRoot $clusterDataRootPath -FabricLogRoot $logRoot -ClusterManifestPath $newManifest -InfrastructureManifestPath $infrastructureManifest
Write-Host "$env:computername : Updating Node configuration complete"
#restart these services
Write-Host "$env:computername : Starting services "
StartServiceFabricServices
}
else {
Write-Host "$env:computername : Manifest File already contains the AEPCC parameter $nodeIpAddress"
}
}
if ($localOnly) {
write-host "executing on local node only"
invoke-command -ScriptBlock $scriptBlock -ArgumentList $clusterDataRootPath, $tempPath
return
}
if (!$global:creds) {
Write-Host "Enter your RDP Credentials"
$creds = Get-Credential
if ($cacheCredentials) {
$global:creds = $creds
}
}
else{
$creds = $global:creds
}
function fixNodes($title, $scriptBlock, $nodeIpArray) {
$count = 0
ForEach ($nodeIpAddress in $nodeIpArray) {
$count++
#Verifying whether corresponding VM is up and running
if (Test-Connection -ComputerName $nodeIpAddress -Quiet) {
$activity = "$title : total minutes: $(((get-date) - $startTime).TotalMinutes.tostring("0.0")). connecting to: $nodeIpAddress ($count of $($nodeIpArray.Count))"
$status = "success: $($global:successNodes | sort -Unique) fail: $($global:failNodes | sort -Unique)"
Write-Progress -Activity $activity `
-Status $status `
-PercentComplete (($count / $nodeIpArray.Count) * 100)
write-host "updating trustedhosts list" -foregroundcolor green
set-item wsman:\localhost\Client\TrustedHosts -value $nodeIpAddress -Force
Write-Host "---------------------------------------------------------------------------------------------------------"
Write-Host "---- Node IP :" $nodeIpAddress
Start-Sleep(1)
$error.clear()
Invoke-Command -Authentication Negotiate -ComputerName $nodeIpAddress {
$temp = Set-NetFirewallRule -DisplayGroup 'File and Printer Sharing' -Enabled True -PassThru |
Select-Object DisplayName, Enabled
} -Credential ($creds)
if ($error) {
$global:failNodes += $nodeIpAddress
continue
}
#******************************************************************************
# Script body
# Execution begins here
#******************************************************************************
$error.clear()
Invoke-Command -Authentication Negotiate -Computername $nodeIpAddress -Scriptblock $scriptBlock -ArgumentList $clusterDataRootPath, $tempPath
if ($error) {
$global:failNodes += $nodeIpAddress
}
else {
$global:successNodes += $nodeIpAddress
}
}
else {
Write-Warning "$env:computername : unable to connect to node: $nodeIpAddress"
$global:failNodes += $nodeIpAddress
}
}
Write-Progress -Completed -Activity "complete"
}
<#
.SYNOPSIS
Get the list of seed nodes
#>
if ($nodeIpArray[0] -eq 0 ) {
Write-Host "Getting Seed node details" -foregroundcolor green
$result = Get-ChildItem -Path $clusterDataRootPath -Filter "Fabric.Data" -Directory -Recurse
$hostPath = $result.Parent.Parent.Name
$manifestPath = $clusterDataRootPath + "\" + $hostPath + "\Fabric\ClusterManifest.current.xml"
$manConfig = [System.Xml.XmlDocument](Get-Content $manifestPath)
$seedNode = $manConfig.ClusterManifest.Infrastructure.PaaS.Votes.Vote
$nodeIpArray = $seedNode.IPAddressOrFQDN
fixNodes -title "Seed Nodes" -scriptBlock $scriptBlock -nodeIpArray $nodeIpArray
}
else {
fixNodes -title "Custom Nodes" -scriptBlock $scriptBlock -nodeIpArray $nodeIpArray
}
Write-host "Getting the list of all nodes" -foregroundcolor green
start-sleep -Seconds 60
Connect-ServiceFabricCluster
$node = Get-ServiceFabricNode
$nodeIpArray = $node.IpAddressOrFQDN
Write-host "Fixing All Nodes" -foregroundcolor green
fixNodes -title "All Nodes" -scriptBlock $scriptBlock -nodeIpArray $nodeIpArray
write-host "reset trusted hosts to original values" -foregroundcolor green
set-item wsman:\localhost\Client\TrustedHosts -value $curValue -Force
Write-Progress -Completed -Activity "complete"
if ($global:successNodes) {
$successUnique = $global:successNodes | sort-object -Unique
write-host "total node success: $(@($successUnique).Count)" -ForegroundColor green
write-host ($successUnique | fl * | out-string)
}
if ($global:failNodes) {
$failUnique = $global:failNodes | sort-object -Unique
write-warning "`r`ntotal node connection errors: $(@($failUnique).Count). review output"
write-host ($failUnique | fl * | out-string)
write-warning "for any failed nodes, rdp to node and run this script with '-localOnly' switch"
}
write-host "finished. total minutes: $(((get-date) - $startTime).TotalMinutes.ToString("0.0"))" -foregroundcolor green