staging/provisioning/windows/windowsnodereset.ps1 (130 lines of code) (raw):

<# .DESCRIPTION This script is intended to be run each time a windows nodes is restarted and performs cleanup actions to help ensure the node comes up cleanly. #> $global:LogPath = "c:\k\windowsnodereset.log" $global:HNSModule = "c:\k\hns.v2.psm1" $Global:ClusterConfiguration = ConvertFrom-Json ((Get-Content "c:\k\kubeclusterconfig.json" -ErrorAction Stop) | out-string) $global:CsiProxyEnabled = [System.Convert]::ToBoolean($Global:ClusterConfiguration.Csi.EnableProxy) $global:MasterSubnet = $Global:ClusterConfiguration.Kubernetes.ControlPlane.MasterSubnet $global:NetworkMode = "L2Bridge" $global:NetworkPlugin = $Global:ClusterConfiguration.Cni.Name # if dual-stack is enabled, the clusterCidr will have an IPv6 CIDR in the comma separated list # we can split the entire string by ":" to get a count of how many ":" there are. If there are # at least 3 groups (which means there are at least 2 ":") then we know there is an IPv6 CIDR # in the list. We cannot just rely on `ClusterCidr -like "*::*" because there are IPv6 CIDRs that # don't have "::", e.g. fe80:0:0:0:0:0:0:0/64 $IsDualStackEnabled = ($Global:ClusterConfiguration.Kubernetes.Kubeproxy.FeatureGates -contains "IPv6DualStack=true") -Or ` (($Global:ClusterConfiguration.Kubernetes.Network.ClusterCidr -split ":").Count -ge 3) filter Timestamp { "$(Get-Date -Format o): $_" } function Write-Log ($message) { $message | Timestamp | Tee-Object -FilePath $global:LogPath -Append } function Register-HNSRemediatorScriptTask { # Hardcoding RepetitionInterval to 1 Minute # Making it variable would need a new parameter to be added under windowsProfile Write-Log "Creating a scheduled task to run hnsremediator.ps1" $action = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-File `"c:\k\hnsremediator.ps1`"" $principal = New-ScheduledTaskPrincipal -UserId SYSTEM -LogonType ServiceAccount -RunLevel Highest $trigger = New-JobTrigger -Once -At (Get-Date).Date -RepeatIndefinitely -RepetitionInterval (New-TimeSpan -Minutes 1) $definition = New-ScheduledTask -Action $action -Principal $principal -Trigger $trigger -Description "hns-remediator-task" Register-ScheduledTask -TaskName "hns-remediator-task" -InputObject $definition } function Unregister-HNSRemediatorScriptTask { if (Get-ScheduledTask -TaskName "hns-remediator-task" -ErrorAction Ignore) { Write-Log "Deleting the scheduled task hns-remediator-task" Unregister-ScheduledTask -TaskName "hns-remediator-task" -Confirm:$false } $hnsPIDFile="C:\k\hns.pid" if (Test-Path $hnsPIDFile) { # Remove this file since PID of HNS service may have been changed after node crashes or is rebooted # It should not always fail since hns-remediator-task is unregistered. # We set the max retry count to 20 to avoid dead loop for unknown issues. $maxRetries=20 $retryCount=0 while ($retryCount -lt $maxRetries) { Write-Log "Deleting $hnsPIDFile" Remove-Item -Path $hnsPIDFile -Force -Confirm:$false -ErrorAction Ignore # The file may not be deleted successfully because hnsremediator.ps1 is still writing the logs if (Test-Path $hnsPIDFile) { # Do not log the failure to reduce log Start-Sleep -Milliseconds 500 $retryCount=$retryCount+1 } else { Write-Log "$hnsPIDFile is deleted" break } } } } Write-Log "Entering windowsnodereset.ps1" Import-Module $global:HNSModule Unregister-HNSRemediatorScriptTask # # Stop services # Write-Log "Stopping kubeproxy service" Stop-Service kubeproxy Write-Log "Stopping kubelet service" Stop-Service kubelet if ($global:CsiProxyEnabled) { Write-Log "Stopping csi-proxy service" Stop-Service csi-proxy } if ($global:EnableHostsConfigAgent) { Write-Log "Stopping hosts-config-agent service" Stop-Service hosts-config-agent } # Due to a bug in hns there is a race where it picks up the incorrect IPv6 address from the node in some cases. # Hns service has to be restarted after the node internal IPv6 address is available when dual-stack is enabled. # TODO Remove this once the bug is fixed in hns. function Restart-HnsService { do { Start-Sleep -Seconds 1 $nodeInternalIPv6Address = (Get-NetIPAddress | Where-Object {$_.PrefixOrigin -eq "Dhcp" -and $_.AddressFamily -eq "IPv6"}).IPAddress } while ($nodeInternalIPv6Address -eq $null) Write-Log "Got node internal IPv6 address: $nodeInternalIPv6Address" $hnsManagementIPv6Address = (Get-HnsNetwork | Where-Object {$_.IPv6 -eq $true}).ManagementIPv6 Write-Log "Got hns ManagementIPv6: $hnsManagementIPv6Address" if ($hnsManagementIPv6Address -ne $nodeInternalIPv6Address) { Restart-Service hns Write-Log "Restarted hns service" $hnsManagementIPv6Address = (Get-HnsNetwork | Where-Object {$_.IPv6 -eq $true}).ManagementIPv6 Write-Log "Got hns ManagementIPv6: $hnsManagementIPv6Address after restart" } else { Write-Log "Hns network has correct IPv6 address, not restarting" } } if ($IsDualStackEnabled) { Restart-HnsService } # # Perform cleanup # & "c:\k\cleanupnetwork.ps1" # # Create required networks # # If using kubenet create the HNS network here. # (The kubelet creates the HNS network when using azure-cni + azure cloud provider) if ($global:NetworkPlugin -eq 'kubenet') { Write-Log "Creating new hns network: $($global:NetworkMode.ToLower())" $podCIDR = Get-PodCIDR $masterSubnetGW = Get-DefaultGateway $global:MasterSubnet New-HNSNetwork -Type $global:NetworkMode -AddressPrefix $podCIDR -Gateway $masterSubnetGW -Name $global:NetworkMode.ToLower() -Verbose Start-sleep 10 } # # Start Services # if ($global:CsiProxyEnabled) { Write-Log "Starting csi-proxy service" Start-Service csi-proxy } Write-Log "Starting kubelet service" Start-Service kubelet Write-Log "Do not start kubeproxy service since kubelet will restart kubeproxy" Register-HNSRemediatorScriptTask Write-Log "Exiting windowsnodereset.ps1"