Scripts/FixMissingSeednode.ps1 (150 lines of code) (raw):

# # For usage information, please refer to https://github.com/Azure/Service-Fabric-Troubleshooting-Guides/blob/master/Cluster/How%20to%20fix%20missing%20seednodes%20with%20Automated%20script.md # Param( [Parameter(Mandatory=$false)] [ValidateNotNullOrEmpty()] [string] $ClusterDataRootPath="D:\SvcFab", [Parameter(Mandatory=$true)] [ValidateNotNullOrEmpty()] [string]$NodeToFake="_sys_4", [Parameter(Mandatory=$true)] [ValidateNotNullOrEmpty()] [string]$TemporaryNodeIpAddress="10.0.0.10" # ip address for this node which we will fake into looking like $NodeToFake - missing seed node _sys_4 ) Set-StrictMode -Version 3 $ErrorActionPreference = "Stop" If(!(Test-Path $ClusterDataRootPath)) { Write-Host $ClusterDataRootPath " not found, exiting." Exit-PSSession } function StopServiceFabricServices { $bootstrapAgent = "ServiceFabricNodeBootstrapAgent" $fabricHost = "FabricHostSvc" $bootstrapService = Get-Service -Name $bootstrapAgent if ($bootstrapService.Status -eq "Running"){ Stop-Service $bootstrapAgent Write-Host "Stopping " $bootstrapAgent " service" } else { Write-Host $fabricHost " not Running" } Do { Start-Sleep -Seconds 1 $bootstrapService = Get-Service -Name $bootstrapAgent if ($bootstrapService.Status -eq "Stopped"){ Write-Host $bootstrapAgent " now stopped" } else { Write-Host $bootstrapAgent " current status:" $bootstrapService.Status } } While ($bootstrapService.Status -ne "Stopped") $fabricHostService = Get-Service -Name $fabricHost if ($fabricHostService.Status -eq "Running"){ Stop-Service $fabricHost Write-Host "Stopping " $fabricHost " service" } else { Write-Host $fabricHost " not Running" } Do { Start-Sleep -Seconds 1 $fabricHostService = Get-Service -Name $fabricHost if ($fabricHostService.Status -eq "Stopped"){ Write-Host $fabricHost " now stopped" } else { Write-Host $fabricHost " current status:" $fabricHostService.Status } } While ($fabricHostService.Status -ne "Stopped") } function StartServiceFabricServices { $bootstrapAgent = "ServiceFabricNodeBootstrapAgent" $fabricHost = "FabricHostSvc" $fabricHostService = Get-Service -Name $fabricHost if ($fabricHostService.Status -eq "Stopped"){ Start-Service $fabricHost -ErrorAction SilentlyContinue -ErrorVariable FabricHostProcessError Write-Host "Starting" $fabricHost " service" } else { Write-Host $fabricHost " not Stopped" } Do { Start-Sleep -Seconds 1 $fabricHostService = Get-Service -Name $fabricHost if ($fabricHostService.Status -eq "Running"){ Write-Host $fabricHost " now running" } else { Write-Host $fabricHost " current status:" $fabricHostService.Status } } While ($fabricHostService.Status -ne "Running") $bootstrapService = Get-Service -Name $bootstrapAgent if ($bootstrapService.Status -eq "Stopped"){ Start-Service $bootstrapAgent -ErrorAction SilentlyContinue -ErrorVariable BootstrapProcessError Write-Host "Starting" $bootstrapAgent " service" } else { Write-Host $bootstrapAgent " not Stopped" } Do { Start-Sleep -Seconds 1 $bootstrapService = Get-Service -Name $bootstrapAgent if ($bootstrapService.Status -eq "Running"){ Write-Host $bootstrapAgent " now running" } else { Write-Host $bootstrapAgent " current status:" $bootstrapService.Status } } While ($bootstrapService.Status -ne "Running") } # Stop the Service Fabric services Write-Host "Stopping services " StopServiceFabricServices # Parse and locate important configuration files $result = Get-ChildItem -Path $ClusterDataRootPath -Filter "Fabric.Data" -Directory -Recurse $hostPath = $result.Parent.Parent.Name Write-Host "---------------------------------------------------------------------------------------------------------" Write-Host "---- Working on ip:" $hostPath Write-Host "---------------------------------------------------------------------------------------------------------" $manifestPath = $ClusterDataRootPath + "\" + $hostPath + "\Fabric\ClusterManifest.current.xml" $currentPackage = $ClusterDataRootPath + "\" + $hostPath + "\Fabric\Fabric.Package.current.xml" $infrastructureManifest = $ClusterDataRootPath + "\" + $hostPath + "\Fabric\Fabric.Data\InfrastructureManifest.xml" # Create the temp folder $tempFolder = 'd:\temp\seednodework' New-Item -ItemType Directory -Force -Path $tempFolder # Read and update current configs, save to the temp folder with new names $newManifest = Join-Path $tempFolder 'modified_clustermanifest.xml' $newInfraManifest = Join-Path $tempFolder 'modified_InfrastructureManifest.xml' # Parse seednodes $clusterManifest = [xml](Get-Content $manifestPath) $seednodes = $clusterManifest.ClusterManifest.Infrastructure.PaaS.Votes.Vote $oldIp = '0.0.0.0' foreach($vote in $seednodes) { Echo $vote.NodeName if($vote.NodeName -eq $NodeToFake) { $oldIp = $vote.IPAddressOrFQDN break } } if($oldIp -eq '0.0.0.0') { Write-Host Write-Host "Error: Cannot find Vote entry for " $($NodeToFake) " in " $($manifestPath) Write-Host Stop } # Find and replace old ip with the new one (Get-Content $manifestPath | Foreach-Object { $_ -replace $oldIp, $TemporaryNodeIpAddress } | Set-Content $newManifest) # Find and replace old ip with the new one, and current nodename with nodetofake (Get-Content $infrastructureManifest | Foreach-Object { $_ -replace $oldIp, $TemporaryNodeIpAddress } | Foreach-Object { $_ -replace $hostPath, $NodeToFake } | Set-Content $newInfraManifest) # Create new node configuration New-ServiceFabricNodeConfiguration -ClusterManifestPath $newManifest -InfrastructureManifestPath $newInfraManifest # Rename the old node configuration folder $fullHostPath = Join-Path $ClusterDataRootPath $hostPath $oldHostPath = Join-Path $tempFolder "oldNode" New-Item -ItemType Directory -Force -Path $oldHostPath Get-ChildItem -Path $fullHostPath -Recurse | Move-Item -destination $oldHostPath Remove-Item $fullHostPath -Force # Restart the Service Fabric services Write-Host "Starting services " StartServiceFabricServices