Scripts/FixExpiredCert.ps1 (304 lines of code) (raw):

<# .SYNOPSIS Script to attempt to replace expired certificate on service fabric cluster .DESCRIPTION Usage Instructions: Refer to https://github.com/Azure/Service-Fabric-Troubleshooting-Guides/blob/master/Security/Fix%20Expired%20Cluster%20Certificate%20-%20Automated%20Script .PARAMETER clusterDataRootPath service fabric data installation path. default d:\svcfab in azure .PARAMETER oldThumbPrint [required][string] old / expired existing certificate thumbprint that needs to be replaced .PARAMETER newThumbPrint [required][string] new / valid certificate thumbprint for cluster .PARAMETER certStoreLocation [string]default certificate store location 'Cert:\LocalMachine\My\', .PARAMETER nodeIpArray [string[]] string array of ip addresses of nodes in cluster .PARAMETER cacheCredentials [switch] enable storing credentials in $global:creds variable. to clear, execute: $global:creds=$null .PARAMETER localOnly switch to optionally run script only on local node. use when there are connectivity issues between nodes by rdp'ing to each node and running with this switch. .LINK iwr https://raw.githubusercontent.com/Azure/Service-Fabric-Troubleshooting-Guides/master/Scripts/FixExpiredCert.ps1 -out $pwd/FixExpiredCert.ps1 #> Param( [ValidateNotNullOrEmpty()] [string] $clusterDataRootPath = "D:\SvcFab", [ValidateNotNullOrEmpty()] [string]$oldThumbprint = "replace with expired thumbprint", [ValidateNotNullOrEmpty()] [string]$newThumbprint = "replace with new thumbprint", [ValidateNotNullOrEmpty()] [string]$certStoreLocation = 'Cert:\LocalMachine\My\', [ValidateNotNullOrEmpty()] [string[]]$nodeIpArray = @("10.0.0.4", "10.0.0.5", "10.0.0.6" ), [switch]$cacheCredentials, [switch]$localOnly ) $error.Clear() $ErrorActionPreference = 'continue' $startTime = get-date $global:failNodes = @() $global:successNodes = @() $count = 0 $creds = $null If (!(Test-Path $clusterDataRootPath)) { Write-Host $clusterDataRootPath " not found, exiting." return } $curValue = (get-item wsman:\localhost\Client\TrustedHosts).value if (![regex]::IsMatch($oldThumbprint, '^[0-9A-Fa-f]{24,}$') -or ![regex]::IsMatch($newThumbprint, '^[0-9A-Fa-f]{24,}$')) { $errMessage = "verify oldthumbprint:($oldthumbprint) and newthumbprint:($newthumbprint) are specified and are correct." if($oldThumbprint.Contains(' ') -or $oldThumbprint.Contains(' ')){ write-error $errMessage return } write-warning $errMessage } $scriptBlock = { param($clusterDataRootPath, $oldThumbprint, $newThumbprint, $certStoreLocation) Write-Host "$env:computername : Running on $((Get-WmiObject win32_computersystem).DNSHostName)" -ForegroundColor Green function StopServiceFabricServices { if ($(Get-Process | ? ProcessName -like "*FabricInstaller*" | measure).Count -gt 0) { Write-Warning "$env:computername : Found FabricInstaller running, may cause issues if not stopped, consult manual guide..." Write-Host "$env:computername : Pausing (15s)..." -ForegroundColor Green Start-Sleep -Seconds 15 } $bootstrapAgent = "ServiceFabricNodeBootstrapAgent" $fabricHost = "FabricHostSvc" $bootstrapService = Get-Service -Name $bootstrapAgent if ($bootstrapService.Status -eq "Running") { Stop-Service $bootstrapAgent -ErrorAction SilentlyContinue Write-Host "$env:computername : Stopping $bootstrapAgent service" -ForegroundColor Green } Do { Start-Sleep -Seconds 1 $bootstrapService = Get-Service -Name $bootstrapAgent if(!$bootstrapService) { break } if ($bootstrapService.Status -eq "Stopped") { Write-Host "$env:computername : $bootstrapAgent now stopped" -ForegroundColor Green } else { Write-Host "$env:computername : $bootstrapAgent current status: $($bootstrapService.Status)" -ForegroundColor Green } } While ($bootstrapService.Status -ne "Stopped") $fabricHostService = Get-Service -Name $fabricHost if ($fabricHostService.Status -eq "Running") { Stop-Service $fabricHost -ErrorAction SilentlyContinue Write-Host "$env:computername : Stopping $fabricHost service" -ForegroundColor Green } Do { Start-Sleep -Seconds 1 $fabricHostService = Get-Service -Name $fabricHost if(!$fabricHostService) { break } if ($fabricHostService.Status -eq "Stopped") { Write-Host "$env:computername : $fabricHost now stopped" -ForegroundColor Green } else { Write-Host "$env:computername : $fabricHost current status: $($fabricHostService.Status)" -ForegroundColor Green } } While ($fabricHostService.Status -ne "Stopped") } function StartServiceFabricServices { $bootstrapAgent = "ServiceFabricNodeBootstrapAgent" $fabricHost = "FabricHostSvc" $fabricHostService = Get-Service -Name $fabricHost if ($fabricHostService.Status -eq "Stopped") { Start-Service $fabricHost -ErrorAction SilentlyContinue Write-Host "$env:computername : Starting $fabricHost service" -ForegroundColor Green } Do { Start-Sleep -Seconds 1 $fabricHostService = Get-Service -Name $fabricHost if(!$fabricHostService) { break } if ($fabricHostService.Status -eq "Running") { Write-Host "$env:computername : $fabricHost now running" -ForegroundColor Green } else { Write-Host "$env:computername : $fabricHost current status: $($fabricHostService.Status)" -ForegroundColor Green } } While ($fabricHostService.Status -ne "Running") $bootstrapService = Get-Service -Name $bootstrapAgent if ($bootstrapService.Status -eq "Stopped") { Start-Service $bootstrapAgent -ErrorAction SilentlyContinue Write-Host "$env:computername : Starting $bootstrapAgent service" -ForegroundColor Green } Do { Start-Sleep -Seconds 1 $bootstrapService = Get-Service -Name $bootstrapAgent if(!$bootstrapService) { break } if ($bootstrapService.Status -eq "Running") { Write-Host "$env:computername : $bootstrapAgent now running" -ForegroundColor Green } else { Write-Host "$env:computername : $bootstrapAgent current status: $($bootstrapService.Status)" -ForegroundColor Green } } While ($bootstrapService.Status -ne "Running") } #config files we need #"D:\SvcFab\clusterManifest.xml" #"D:\SvcFab\_sys_0\Fabric\Fabric.Data\InfrastructureManifest.xml" #"D:\SvcFab\_sys_0\Fabric\Fabric.Config.1.131523081591497214\Settings.xml" $result = Get-ChildItem -Path $clusterDataRootPath -Filter "Fabric.Data" -Directory -Recurse $hostPath = $result.Parent.Parent.Name Write-Host "---------------------------------------------------------------------------------------------------------" Write-Host "---- Working on ip:" $hostPath Write-Host "---------------------------------------------------------------------------------------------------------" $manifestPath = $clusterDataRootPath + "\" + $hostPath + "\Fabric\ClusterManifest.current.xml" $currentPackage = $clusterDataRootPath + "\" + $hostPath + "\Fabric\Fabric.Package.current.xml" $infrastructureManifest = $clusterDataRootPath + "\" + $hostPath + "\Fabric\Fabric.Data\InfrastructureManifest.xml" #to get the settings.xml we need to determine the current version #"D:\SvcFab\_sys_0\Fabric\Fabric.Package.current.xml" --> Read to determine verion# <ConfigPackage Name="Fabric.Config" Version="1.131523081591497214" /> $currentPackageXml = [xml](Get-Content $currentPackage) $packageName = $currentPackageXml.ServicePackage.DigestedConfigPackage.ConfigPackage | Select-Object -ExpandProperty Name $packageVersion = $currentPackageXml.ServicePackage.DigestedConfigPackage.ConfigPackage | Select-Object -ExpandProperty Version $SettingsFile = $clusterDataRootPath + "\" + $hostPath + "\Fabric\" + $packageName + "." + $packageVersion + "\settings.xml" $SettingsPath = $clusterDataRootPath + "\" + $hostPath + "\Fabric\" + $packageName + "." + $packageVersion Write-Host "$env:computername : settings file: " $SettingsFile Write-Host "$env:computername : Settings path: " $SettingsPath $settings = [xml](Get-Content $SettingsFile) #TODO: validate newThumbprint is installed $thumbprintPath = $certStoreLocation + $newThumbprint If (!(Test-Path $thumbprintPath)) { Write-Host "$env:computername : $newThumbprint not installed" return } #TODO: validate newThumbprint is ACL'd for NETWORK_SERVICE #------------------------------------------------------- start ACL #Change to the location of the local machine certificates $currentLocation = Get-Location Set-Location $certStoreLocation #display list of installed certificates in this store Get-ChildItem | Format-Table Subject, Thumbprint, SerialNumber -AutoSize Set-Location $currentLocation $thumbprint = $certStoreLocation + $newThumbprint Write-Host "$env:computername : Setting ACL for $thumbprint" -ForegroundColor Green #get the container name $cert = get-item $thumbprint # Specify the user, the permissions and the permission type $permission = "$("NETWORK SERVICE")", "FullControl", "Allow" $accessRule = New-Object -TypeName System.Security.AccessControl.FileSystemAccessRule -ArgumentList $permission # Location of the machine related keys $keyPath = Join-Path -Path $env:ProgramData -ChildPath "\Microsoft\Crypto\RSA\MachineKeys" $keyName = $cert.PrivateKey.CspKeyContainerInfo.UniqueKeyContainerName $keyFullPath = Join-Path -Path $keyPath -ChildPath $keyName # Get the current acl of the private key $acl = (Get-Item $keyFullPath).GetAccessControl('Access') # Add the new ace to the acl of the private key $acl.SetAccessRule($accessRule) # Write back the new acl Set-Acl -Path $keyFullPath -AclObject $acl -ErrorAction Stop # Observe the access rights currently assigned to this certificate. get-acl $keyFullPath | Format-List #------------------------------------------------------- done ACL # create a temp folder New-Item -ItemType Directory -Force -Path 'd:\temp\certwork' | out-null #copy current config to the temp folder Copy-Item -Path $manifestPath -Destination 'd:\temp\certwork' -Force -Verbose $newManifest = "D:\temp\certwork\modified_clustermanifest.xml" Copy-Item -Path $infrastructureManifest -Destination 'd:\temp\certwork' -Force -Verbose $newInfraManifest = "D:\temp\certwork\modified_InfrastructureManifest.xml" Copy-Item -Path $SettingsFile -Destination 'd:\temp\certwork' -Force -Verbose $newSettingsManifest = "D:\temp\certwork\modified_settings.xml" # find and replace old thumbprint with the new one (Get-Content "d:\temp\certwork\clustermanifest.current.xml" | Foreach-Object { $_ -replace $oldThumbprint, $newThumbprint } | Set-Content $newManifest) # find and replace old thumbprint with the new one (Get-Content "d:\temp\certwork\InfrastructureManifest.xml" | Foreach-Object { $_ -replace $oldThumbprint, $newThumbprint } | Set-Content $newInfraManifest) # find and replace old thumbprint with the new one (Get-Content "d:\temp\certwork\settings.xml" | Foreach-Object { $_ -replace $oldThumbprint, $newThumbprint } | Set-Content $newSettingsManifest) $backupSettingsFile = $SettingsPath + "\settings_backup.xml" Copy-Item -Path $SettingsFile -Destination $backupSettingsFile -Force -Verbose Copy-Item -Path $newSettingsManifest -Destination $SettingsFile -Force -Verbose #stop these services Write-Host "$env:computername : Stopping services " -ForegroundColor Green StopServiceFabricServices #update the node configuration $logRoot = $clusterDataRootPath + "\Log" Write-Host "$env:computername : Updating Node configuration with new cert: $newThumbprint" -ForegroundColor Green New-ServiceFabricNodeConfiguration -FabricDataRoot $clusterDataRootPath -FabricLogRoot $logRoot -ClusterManifestPath $newManifest -InfrastructureManifestPath $newInfraManifest Write-Host "$env:computername : Updating Node configuration with new cert: complete" -ForegroundColor Green #restart these services Write-Host "$env:computername : Starting services " -ForegroundColor Green StartServiceFabricServices } if ($localOnly) { write-host "executing on local node only" invoke-command -ScriptBlock $scriptBlock -ArgumentList $clusterDataRootPath, $oldThumbprint, $newThumbprint, $certStoreLocation return } if (!$global:creds) { Write-Host "Enter your RDP Credentials" #Get the RDP User Name and Password $creds = Get-Credential if ($cacheCredentials) { $global:creds = $creds } } else{ $creds = $global:creds } ForEach ($nodeIpAddress in $nodeIpArray) { $count++ #Verifying whether corresponding VM is up and running if (Test-Connection -ComputerName $nodeIpAddress -Quiet) { $activity = "total minutes: $(((get-date) - $startTime).TotalMinutes.tostring("0.0")). connecting to: $nodeIpAddress ($count of $($nodeIpArray.Count))" $status = "success: $($global:successNodes | sort -Unique) fail: $($global:failNodes | sort -Unique)" Write-Progress -Activity $activity ` -Status $status ` -PercentComplete (($count / $nodeIpArray.Count) * 100) write-host "$env:computername : updating trustedhosts list" -foregroundcolor green set-item wsman:\localhost\Client\TrustedHosts -value $nodeIpAddress -Force Start-Sleep(1) $error.clear() Invoke-Command -Authentication Negotiate -ComputerName $nodeIpAddress { Set-NetFirewallRule -DisplayGroup 'File and Printer Sharing' -Enabled True -PassThru | Select-Object DisplayName, Enabled } -Credential ($creds) if ($error) { $global:failNodes += $nodeIpAddress continue } $error.clear() Invoke-Command -Authentication Negotiate -Computername $nodeIpAddress -Scriptblock $scriptBlock ` -ArgumentList $clusterDataRootPath, $oldThumbprint, $newThumbprint, $certStoreLocation if ($error) { $global:failNodes += $nodeIpAddress } else { $global:successNodes += $nodeIpAddress } } else { Write-Warning "$env:computername : unable to connect to node: $nodeIpAddress" $global:failNodes += $nodeIpAddress } } write-host "reset trusted hosts to original values" -foregroundcolor green set-item wsman:\localhost\Client\TrustedHosts -value $curValue -Force Write-Progress -Completed -Activity "complete" if ($global:successNodes) { $successUnique = $global:successNodes | sort-object -Unique write-host "total node success: $(@($successUnique).Count)" -ForegroundColor green write-host ($successUnique | fl * | out-string) } if ($global:failNodes) { $failUnique = $global:failNodes | sort-object -Unique write-warning "`r`ntotal node connection errors: $(@($failUnique).Count). review output" write-host ($failUnique | fl * | out-string) write-warning "for any failed nodes, rdp to node and run this script with '-localOnly' switch" } write-host "finished. total minutes: $(((get-date) - $startTime).TotalMinutes.ToString("0.0"))" -foregroundcolor green