Documents/Add-HPCIaasNode-Scripts/HPCIaaSNodeMgmtUtil.ps1 (629 lines of code) (raw):
$script:HpcIaaSRegistryPath = "HKLM:\Software\Microsoft\HPC\IaaSInfo"
$script:AzureSizeInfo = @{"Small"=@{"Core"=1;"Socket"=1;"Memory"=1750}; `
"Medium"=@{"Core"=2;"Socket"=1;"Memory"=3500}; `
"Large"=@{"Core"=4;"Socket"=1;"Memory"=7000}; `
"ExtraLarge"=@{"Core"=8;"Socket"=1;"Memory"=14000};`
"A5"=@{"Core"=2;"Socket"=1;"Memory"=1};`
"A6"=@{"Core"=4;"Socket"=1;"Memory"=0};`
"A7"=@{"Core"=8;"Socket"=1;"Memory"=0};`
"A8"=@{"Core"=8;"Socket"=1;"Memory"=0};`
"A9"=@{"Core"=16;"Socket"=1;"Memory"=0}`
}
function TraceVerbose($log)
{
Write-Host $log
TraceToLogFile $log
}
function TraceInfo($log)
{
Write-Verbose $log -verbose
TraceToLogFile $log
}
function TraceWarning($log)
{
Write-Warning $log
TraceToLogFile $log
}
function TraceError($log)
{
Write-Error $log
TraceToLogFile $log
}
function TraceToLogFile($log)
{
if ($script:LogFile -ne $null)
{
"$(Get-Date -format 'MM/dd/yyyy HH:mm:ss') $log" | Out-File -Confirm:$false -FilePath $script:LogFile -Append
}
}
function LoadAzureAndHpcModules($enableLog = $true)
{
# The script has been tested on Powershell 3.0
Set-StrictMode -Version 3
# Following modifies the Write-Verbose behavior to turn the messages on globally for this session
$VerbosePreference = "SilentlyContinue"
if ($enableLog)
{
$StartTime = (Get-Date).ToUniversalTime()
$datetimestr = $StartTime.ToString("yyyyMMddhhmmss")
$script:LogFile = "$env:temp\HPCNodeLog-$datetimestr.txt"
Write-Verbose "Log will be written to $LogFile" -Verbose
}
else
{
$script:LogFile = $null
}
TraceVerbose "Initializing HPC and Azure environment"
# Check if Windows Azure Powershell is avaiable
$azureModule = Get-Module -ListAvailable -Name Azure
if ($azureModule -eq $null)
{
throw "Azure Powershell not found. Install the latest version from http://www.windowsazure.com/en-us/downloads/#cmd-line-tools"
}
if ($azureModule.Version -lt "0.8.2")
{
$ver = $azureModule.Version
throw "The Azure Powershell version $ver is too old. Install the latest version from http://www.windowsazure.com/en-us/downloads/#cmd-line-tools"
}
Import-Module -Name Azure
Add-PSSnapIn Microsoft.HPC
}
function SelectAzureSubscription($subscriptionId)
{
$subs = Get-AzureSubscription | where-object {$_.SubscriptionId -eq $subscriptionId}
if ($subs -ne $null)
{
Select-AzureSubscription -SubscriptionName $subs.SubscriptionName | Out-Null
return $subs.SubscriptionName
}
else
{
throw "Cannot find profile for Azure subscription $subscriptionId. Ensure that the subscription ID is correct and first import the Azure profile."
}
}
<#
.Synopsis
check whether the VNet is existed
check whether the VNet is regional VNet
check whether the VNet is in $location
check whether the VNet contains $subnet
#>
function ValidateAzureVNet($vNet, $subNet, $location)
{
$uniqueID = [Guid]::NewGuid().ToString()
$vnetCfgFile = "$env:temp\VNetCfg_$uniqueID.xml"
InvokeAzureCmdWithRetry -Command "Get-AzureVNetConfig -ExportToFile $vnetCfgFile" | Out-Null
if (Test-Path -Path $vnetCfgFile)
{
Try{
$xmlDoc = [xml](Get-Content $vnetCfgFile)
Remove-Item $vnetCfgFile -Force -Confirm:$false
$vnetSite = $xmlDoc.GetElementsByTagName("VirtualNetworkSite") | Where-Object {$_.name -eq $vNet}
if ($vnetSite -eq $null)
{
throw "Virtual network $vNet doesn't exist. Ensure that the virtual network is configured properly."
}
if ($vnetSite.HasAttribute("Location"))
{
if ($vnetSite.Location -ne $location)
{
throw "Virtual network $vNet isn't in region $location. Ensure that the virtual network and region are configured properly."
}
if ( ($vnetSite.Subnets -ne $null) -and ($vnetSite.Subnets.Subnet -ne $null))
{
foreach($sub in $vnetSite.Subnets.Subnet)
{
if($sub.name -eq $subNet)
{
return
}
}
}
throw "Subnet $subNet doesn't exist in virtual network $vNet. Ensure that the subnet is configured properly."
}
else
{
throw "Virtual network $vNet is not a regional Virtual network. Ensure that the virtual network is configured properly."
}
}
Finally
{
Remove-Item $vnetCfgFile -Force -Confirm:$false -ErrorAction SilentlyContinue
}
}
}
function ValidateCloudService($ServiceName, $VNetName, $Location, $AffinityGroupName, $DeploymentLabel, $Quantity)
{
$service = InvokeAzureCmdWithRetry -Command "Get-AzureService -ServiceName $ServiceName -ErrorAction SilentlyContinue"
if ($service -ne $null)
{
$serviceLocation = $service.Location
if ($serviceLocation -eq $null)
{
$affinity = InvokeAzureCmdWithRetry -Command "Get-AzureAffinityGroup -Name $($service.AffinityGroup) -ErrorAction SilentlyContinue"
$serviceLocation = $affinity.Location
}
if ($serviceLocation -ne $Location)
{
throw "The cloud service $ServiceName must be in region $location"
}
elseif ((-not [String]::IsNullOrEmpty($AffinityGroupName)) -and ($service.AffinityGroup -ne $AffinityGroupName))
{
TraceWarning "It is better to put the cloud service in the same AffinityGroup $AffinityGroupName as Headnode, it may get better performance!"
}
$deployment = InvokeAzureCmdWithRetry -Command "Get-AzureDeployment -ServiceName $ServiceName -Slot Production -ErrorAction SilentlyContinue -WarningAction SilentlyContinue"
if ($deployment -ne $null)
{
# Check VNet
if ($deployment.VNetName -ne $VNetName)
{
throw ("The existing production deployment of cloud service $ServiceName must be in virtual network $VNetName")
}
if ((DecodeLabelIfBase64Encoded $deployment.Label) -ne $DeploymentLabel)
{
throw "The cloud service $ServiceName is already in use by another production deployment."
}
if (($Quantity + $deployment.RoleInstanceList.Count) -gt 50)
{
throw "You cann't deploy $Quantity virtual machines to cloud service $ServiceName which already deployed with $($deployment.RoleInstanceList.Count) virutal machines. Cannot deploy more than 50 virtual machines under one cloud service."
}
return "Deployed"
}
return "Created"
}
else
{
return "NotCreated"
}
}
function GetIaaSInfo()
{
$source = @"
public class HpcIaasInfo
{
public string SubscriptionId { get; set; }
public string Location { get; set; }
public string AffinityGroup { get; set; }
public string VNet { get; set; }
public string SubNet { get; set; }
public HpcIaasInfo(string subscriptionId, string location, string affinityGroup, string vNet, string subNet)
{
this.SubscriptionId = subscriptionId;
this.Location = location;
this.AffinityGroup = affinityGroup;
this.VNet = vNet;
this.SubNet = subNet;
}
}
"@
Add-Type -TypeDefinition $source
if (Test-Path $script:HpcIaaSRegistryPath)
{
$item = Get-Item -LiteralPath $script:HpcIaaSRegistryPath | Get-ItemProperty
if ([String]::IsNullOrEmpty($item.SubscriptionId))
{
throw "SubscriptionId does not exist in the Registry. You must first configure subscription related information."
}
if ([String]::IsNullOrEmpty($item.Location))
{
throw "Location does not exist in the Registry. You must first configure subscription related information."
}
if ([String]::IsNullOrEmpty($item.VNet))
{
throw "VNet does not exist in the Registry. You must first configure subscription related information."
}
if ([String]::IsNullOrEmpty($item.SubNet))
{
throw "SubNet does not exist in the Registry. You must first configure subscription related information."
}
return New-Object HpcIaasInfo -argumentList $item.SubscriptionId, $item.Location, $item.AffinityGroup, $item.VNet, $item.SubNet
}
else
{
throw "Cannot load subscription id related info from the Registry. You must first configure subscription related information."
}
}
function ValidateVMNaming($VMName)
{
$regex = "^[a-zA-Z][a-zA-Z0-9-]{1,13}[a-zA-Z0-9]$"
if($VMName -cnotmatch $regex)
{
throw "The generated node naming pattern $VMName is invalid. It must contain between 3 and 15 characters."
}
}
function ValidateNodeNameSeries($VMNamePattern)
{
$matched = $false
do
{
$regex = "^[a-zA-Z][a-zA-Z0-9-]{0,13}%[0-9]{1,14}%$"
if($VMNamePattern -cnotmatch $regex)
{
break
}
if(($VMNamePattern.Length -gt 17) -or ($VMNamePattern.Length -lt 5))
{
break
}
$matched = $true
} while ($false)
if(-not $matched)
{
throw "The node naming pattern $VMNamePattern is invalid. It should be <root_name>%<start_number>% and meet the following criterions:`n 1. It must contain between 5 and 17 characters including the percent signs;`n 2. The <root_name> can contain only letters, numbers, and hyphens, and it must start with a letter `n 3. The start_number can contain only numbers.Example: AzureVM%1000%."
}
}
function GenerateHpcNodeName($nodeSeries, $quantity, $usingHpcNamingSeries, $existedAzureNodes)
{
$Regex = [System.Text.RegularExpressions.Regex]
$match = $Regex::Match($nodeSeries, "%[0-9]+%")
if (($match -eq $null) -or
(-not $match.Success) -or
($match.ToString() -eq $null))
{
throw "The node naming pattern format is not correct. Use a pattern similar to NodeName%1000%"
}
$baseNum = $match.ToString().Replace("%", "")
$digits = $baseNum.Length;
$nodeList = @()
TraceToLogFile "Trying to get file lock to generate node names"
$maxRetries = 10
$retry = 0
$num = $baseNum -as [int]
while($true)
{
try
{
$lockFile = [System.io.File]::Open("$PSScriptRoot\nodeseries", 'OpenOrCreate', 'ReadWrite', 'None')
break
}
catch
{
if($retry -lt $maxRetries)
{
TraceToLogFile "Failed to get node naming series file lock. There may be other process generating node names at same time, retry after 5 seconds"
Start-Sleep -Seconds 5
$retry++
}
else
{
throw "Failed to get node naming series file lock to generate node names after $maxRetries retries"
}
}
}
try
{
$sequence = @{}
if ($usingHpcNamingSeries)
{
$step = (Get-HpcClusterProperty -NodeNamingSequenceCount).Value -as [int]
}
else
{
$reader = New-Object System.IO.StreamReader($lockFile)
$content = $reader.ReadToEnd() -split '[\r\n]'
foreach($s in $content)
{
$tmp = $s.split("`t")
$sequence[$tmp[0]] = $tmp[1] -as [int]
}
$step = $sequence[$nodeSeries]
}
if ($step -gt $num)
{
$num = $step
}
while ($nodeList.count -lt $quantity)
{
$num ++
if ($num -ge [int]::MaxValue)
{
$num = 0
}
$nodeName = $Regex::Replace($nodeSeries, "%[0-9]+%", $num.ToString().PadLeft($digits, '0'))
<#
if ((Get-HpcNode -name $nodeName 2>$null) -ne $null)
{
TraceToLogFile "Node with name $nodeName has beed already existed in HPC cluster, will continue to generate node name"
continue
}
#>
if (($existedAzureNodes -ne $null) -and ($existedAzureNodes.Contains($nodeName) -eq $true))
{
TraceToLogFile "Node with name $nodeName has beed already existed in Azure, will continue to generate node name"
continue
}
ValidateVMNaming $nodeName
$nodeList += $nodeName
}
if ($usingHpcNamingSeries)
{
TraceToLogFile "Update node sequence count to $num"
Set-HpcClusterProperty -NodeNamingSequenceCount $num
}
else
{
$sequence[$nodeSeries] = $num
$pos = $lockFile.Seek(0, 0)
$writer = New-Object System.IO.StreamWriter($lockFile)
$length = 0
foreach($key in $sequence.keys)
{
$line = $key + "`t" + $sequence[$key]
$length += $line.Length
$writer.WriteLine($line)
}
$writer.Flush()
$lockFile.SetLength($length)
}
}
finally
{
if ($lockFile -ne $null)
{
$lockFile.Close()
}
}
return $nodeList
}
function GetHpcDeploymentLabel($vNet)
{
return "HpcDeploy-" + $vNet + "-" + $env:CCP_SCHEDULER
}
function FindAzureNodes($hpcNodes, $iaasInfo, $filterStatus)
{
$servicesForBatchOp = @{}
$nodeMapping = @{}
$filteredNodes = @()
$services = InvokeAzureCmdWithRetry -Command "Get-AzureService -ErrorAction SilentlyContinue"
$services = $services | Where-Object { ($_.Location -eq $null) -or ($_.Location -eq $iaasInfo.Location)}
foreach ($service in $services.ServiceName)
{
$deployment = InvokeAzureCmdWithRetry -Command "Get-AzureDeployment -ServiceName $service -Slot Production -ErrorAction SilentlyContinue"
if ($deployment -ne $null)
{
if ($deployment.VNetName -eq $iaasInfo.VNet)
{
$vmList = $deployment.RoleInstanceList
$cnt = 0
$validVmList = @()
foreach ($vm in $vmList)
{
$node = $hpcNodes | Where-Object {$_.NetBiosName -eq $vm.InstanceName}
if ($node -ne $null)
{
if ($nodeMapping.ContainsKey($node))
{
throw "More than one Azure node is named $($node.NetBiosName) in virtual network $($iaasInfo.VNet)"
}
$nodeMapping[$node] = @{"VM"=$vm; "ServiceName"=$service}
if ($filterStatus -ne $null -and $filterStatus -eq $vm.InstanceStatus)
{
$filteredNodes += $vm.InstanceName
}
else
{
$cnt ++
$validVmList += $vm.InstanceName
}
}
}
if ($cnt -eq 0)
{
continue
}
$wholeDeployment = $false
if ($cnt -eq $vmList.Count)
{
$wholeDeployment = $true
}
$servicesForBatchOp[$service] = @{"DeploymentName"=$deployment.DeploymentName; "VM"=$validVmList; "WholeDeployment"=$wholeDeployment}
}
}
}
return @{"MappingNodes"=$nodeMapping; "Services"=$servicesForBatchOp; "FilterdNodes"=$filteredNodes}
}
function GetAzureVMs($iaasInfo)
{
$namelist = @()
$services = InvokeAzureCmdWithRetry -Command "Get-AzureService -ErrorAction SilentlyContinue"
$services = $services | Where-Object { ($_.Location -eq $null) -or ($_.Location -eq $iaasInfo.Location) }
foreach ($service in $services.ServiceName)
{
$deployment = InvokeAzureCmdWithRetry -Command "Get-AzureDeployment -ServiceName $service -Slot Production -ErrorAction SilentlyContinue"
if ($deployment -ne $null)
{
if ($deployment.VNetName -eq $iaasInfo.VNet)
{
$vmList = InvokeAzureCmdWithRetry -Command "Get-AzureVM -ServiceName $service -ErrorAction SilentlyContinue"
foreach ($vm in $vmList)
{
$namelist += $vm.Name
}
}
}
}
return $namelist
}
function ValidateCredentials($userDomain, $userName, $password)
{
Add-Type -AssemblyName System.DirectoryServices.AccountManagement
# create instance for domian principle context for input user
$ct = [System.DirectoryServices.AccountManagement.ContextType]::Domain
try
{
$pc = New-Object System.DirectoryServices.AccountManagement.PrincipalContext($ct,$userDomain)
}
catch
{
throw "Error occurred trying to connect to the domain controler. Ensure that the domain user name is correct. Exception: $_"
}
# validate user credential for user with password against domain
if ($pc.ValidateCredentials($userName,$password) -eq $false)
{
throw "Invalid domain user name and password"
}
else
{
# return domain Fqdn
$sepidx = $pc.ConnectedServer.IndexOf('.')
return $pc.ConnectedServer.SubString($sepidx+1, ($pc.ConnectedServer.Length-$sepidx)-1)
}
}
function CreateDomainJoinedVMWithCustomScript
{
param
(
[Parameter(Mandatory=$true)]
[String] $VMName,
[Parameter(Mandatory=$true)]
[String] $ServiceName,
[Parameter(Mandatory=$true)]
[String] $VMSize,
[Parameter(Mandatory=$true)]
[String] $ImageName,
[Parameter(Mandatory=$true)]
[String] $VNetName,
[Parameter(Mandatory=$true)]
[String] $SubnetName,
[Parameter(Mandatory=$true)]
[PSCredential] $LocalAdminCred,
[Parameter(Mandatory=$true)]
[PSCredential] $DomainUserCred,
[Parameter(Mandatory=$true)]
[String] $DomainFQDN,
[Parameter(Mandatory=$true)]
[String] $DeploymentLabel,
[Parameter(Mandatory=$true)]
[String] $ClusterName,
[Parameter(Mandatory=$false)]
[ValidateSet("ReadOnly","ReadWrite")]
[String] $HostCaching = "",
[Parameter(Mandatory=$false)]
[Switch] $Wait
)
TraceInfo "Creating a domain joined VM $VMName in Service $ServiceName `n Size: `t$VMSize`n Image: `t$ImageName`n VNet: `t$VNetName`n Subnet:`t$SubnetName`n Domain:`t$DomainFQDN"
$netbios = $DomainFQDN.Split(".")[0].ToUpper()
$domainUserName = $DomainUserCred.UserName
if($DomainUserCred.UserName.Contains("\"))
{
$domainUserName = $domainUserName.Split("\")[1]
}
try
{
if([String]::IsNullOrEmpty($HostCaching))
{
$vm = New-AzureVMConfig -Name $VMName -InstanceSize $VMSize -ImageName $ImageName
}
else
{
$vm = New-AzureVMConfig -Name $VMName -InstanceSize $VMSize -HostCaching $HostCaching -ImageName $ImageName
}
$vm = $vm | `
Add-AzureProvisioningConfig -AdminUsername $LocalAdminCred.UserName -Password $LocalAdminCred.GetNetworkCredential().Password `
-Domain $netbios -DomainUserName $domainUserName -DomainPassword $DomainUserCred.GetNetworkCredential().Password `
-JoinDomain $DomainFQDN -WindowsDomain | `
Set-AzureVMBGInfoExtension | `
Set-AzureSubnet -SubnetNames $SubnetName |`
Set-AzureVMCustomScriptExtension -FileUri http://yongjun.blob.core.windows.net/scripts/UpdateHPCClusterName.ps1 -Run UpdateHPCClusterName.ps1 -Argument "-ClusterName $ClusterName"
if(($VMSize -eq "A8") -or ($VMSize -eq "A9"))
{
$vm = $vm | Set-AzureVMExtension -ExtensionName "HpcVmDrivers" -Publisher "Microsoft.HpcCompute" -Version "1.*"
}
New-AzureVMWithRetry -VM $vm -ServiceName $ServiceName -VNetName $VNetName -DeploymentLabel $DeploymentLabel
}
catch
{
TraceError "Failed to create a VM $VMName joined to domain $DomainFQDN in cloud service $ServiceName"
throw
}
if ($Wait.IsPresent)
{
WaitForVMReady -VMName $VMName -ServiceName $ServiceName
}
}
function AddHpcLinuxGroup()
{
$retry = 0
$maxRetry = 120
while($true)
{
try
{
$group = Get-HpcGroup -Name LinuxNodes -ErrorAction SilentlyContinue
if ($group -eq $null)
{
New-HpcGroup -Name LinuxNodes -ErrorAction SilentlyContinue
}
break
}
catch
{
if ($retry -lt $maxRetry)
{
Start-Sleep -Seconds 5
$retry ++
}
else
{
throw "Failed to add unmanaged node $nodeName to the HPC cluster"
}
}
}
}
function AddUnManagedNodeWithRetry($nodeName, $core, $socket, $memory)
{
$retry = 0
$maxRetry = 120
while($true)
{
try
{
$node = Get-HpcNode -Name "test" -ErrorAction SilentlyContinue
if ($node -eq $null)
{
Add-HpcUnManagedNode -Name $nodeName -SubscribedCores $core -SubscribedSockets $socket -Memory $memory -GroupName "LinuxNodes"
}
break
}
catch
{
if ($retry -lt $maxRetry)
{
Start-Sleep -Seconds 5
$retry ++
}
else
{
throw "Failed to add unmanaged node $nodeName to the HPC cluster"
}
}
}
}
$NodeManageInitScript =
{
param
(
[Parameter(Mandatory=$true)]
[String[]] $Name,
[Parameter(Mandatory=$false)]
[String] $FilterStatus = $null
)
Add-Type -Path "$env:CCP_HOME/bin/ccppsh.dll"
# Get and validate pre-config information: subscription id
$script:iaasInfo = GetIaaSInfo
TraceVerbose "Selecting Azure subscription $($script:iaasInfo.SubscriptionId)"
$script:subscriptionName = SelectAzureSubscription $script:iaasInfo.SubscriptionId
# Get hpc nodes filtered by $Name, we don't remove headnode
TraceVerbose "Retrieving HPC node information"
$script:hpcNodes = Get-HpcNode -Name $Name -ErrorAction SilentlyContinue | Where-Object {$_.IsHeadNode -eq $false}
if (($script:hpcNodes -eq $null) -or ($script:hpcNodes.Count -eq 0))
{
throw "Cannot find HPC node with name matching $($Name -join ',')"
}
# go through each depoyment under all cloud service, mapping to azure nodes (using deployment label)
TraceVerbose "Scanning Azure virtual machine mapping to HPC nodes ......"
$script:mapping = FindAzureNodes $script:hpcNodes $script:iaasInfo $FilterStatus
$script:nodeMapping = $mapping["MappingNodes"]
$script:servicesForBatchOp = $mapping["Services"]
$script:filteredNodes = $mapping["FilterdNodes"]
}