cosmos/scripts/chaos/chaos_script.ps1 (154 lines of code) (raw):
<#
.SYNOPSIS
This script introduces network faults into a Cosmos DB environment for testing purposes.
.DESCRIPTION
The script takes several parameters to configure the fault injection, including the endpoint, master key, database ID, container ID, duration of fault, drop percentage, delay in milliseconds, fault region, and wait time for fault to start. It then uses these parameters to introduce network faults using the Clumsy utility.
.PARAMETER endpoint
The endpoint for the Cosmos DB instance.
.PARAMETER masterKey
The master key for the Cosmos DB instance.
.PARAMETER databaseId
The ID of the database in the Cosmos DB instance.
.PARAMETER containerId
The ID of the container in the database.
.PARAMETER durationOfFaultInSec
The duration of the network fault in seconds.
.PARAMETER dropPercentage
The percentage of packets to drop (optional).
.PARAMETER delayInMs
The delay to introduce in milliseconds (optional).
.PARAMETER faultRegion
The region where the fault should be introduced.
.PARAMETER waitForFaultToStartInSec
The time to wait before starting the fault in seconds (optional).
.EXAMPLE
.\chaos_script.ps1 -endpoint "https://my-cosmos-db.documents.azure.com:443/" -masterKey "my-master-key" -databaseId "my-database" -containerId "my-container" -durationOfFaultInSec 60 -dropPercentage 50 -delayInMs 200 -faultRegion "West US" -waitForFaultToStartInSec 10
This example introduces a network fault that drops 50% of packets and introduces a 200ms delay for 60 seconds in the "West US" region of the specified Cosmos DB instance. It waits 10 seconds before starting the fault.
#>
param (
[parameter(Mandatory = $true)]
[ValidateNotNull()]
[string] $endpoint,
[parameter(Mandatory = $true)]
[ValidateNotNull()]
[string] $masterKey,
[parameter(Mandatory = $true)]
[ValidateNotNull()]
[string] $databaseId,
[parameter(Mandatory = $true)]
[ValidateNotNull()]
[string] $containerId,
[parameter(Mandatory = $true)]
[ValidateNotNull()]
[string] $durationOfFaultInSec,
[string] $dropPercentage,
[string] $delayInMs,
[parameter(Mandatory = $true)]
[string] $faultRegion,
[string] $waitForFaultToStartInSec
)
if (!$dropPercentage -and !$delayInMs)
{
throw "Both dropPercentage and delayInMs cannot be null together"
}
if ($waitForFaultToStartInSec)
{
Start-Sleep -Seconds $waitForFaultToStartInSec
}
Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))
choco install chocolatey-compatibility.extension -y --force
choco install clumsy -y --force
Write-Host "To remove WinDivertXX.sys, please remove/uninstall all WinDivert client application(s) and reboot." -ForegroundColor Cyan
$databaseAccountResponseJson = & .\GetDatabaseAccount.ps1 -Endpoint $endpoint -MasterKey $masterKey
$databaseAccountResponseObject = $databaseAccountResponseJson | ConvertFrom-Json
$readableLocations = $databaseAccountResponseObject.readableLocations
foreach ($readableLocation in $readableLocations)
{
if ($faultRegion -eq $readableLocation.name)
{
$endpoint = $readableLocation.databaseAccountEndpoint
}
}
$pkRangeResponseJson = & .\GetPKRange.ps1 -Endpoint $endpoint -MasterKey $masterKey -DatabaseID $databaseId -ContainerId $containerId
$pkRangeResponse = $pkRangeResponseJson | ConvertFrom-Json
$partitionKeyRanges = $pkRangeResponse.PartitionKeyRanges
$commaSeparatedPkid = ""
foreach ($partitionKeyRange in $partitionKeyRanges)
{
# Check if the string is true or false
if ($commaSeparatedPkid)
{
$commaSeparatedPkid += "," + $partitionKeyRange.id
}
else
{
$commaSeparatedPkid += $partitionKeyRange.id
}
}
$addressesResponseJson = & .\GetAddresses.ps1 -Endpoint $endpoint -MasterKey $masterKey -PartitionKeyIds $commaSeparatedPkid -DatabaseID $databaseId -ContainerId $containerId
$addressesResponse = $addressesResponseJson | ConvertFrom-Json
$addresses = $addressesResponse.Addresss
$backendUriList = New-Object System.Collections.Generic.List[uri]
foreach ($address in $addresses)
{
$backendUriList += [uri]$address.physcialUri
}
$backendUriList = $backendUriList | select -uniq
$endpointUri = [uri]$endpoint
$endpointHost = $endpointUri.Host
$endpointPort = $endpointUri.Port
$endpointIpAddress = (Resolve-DnsName $endpointHost).IPAddress
# Adding gateway endpoint for filtering
$filterStringList = New-Object System.Collections.ArrayList
$filterString = "(ip.DstAddr == $endpointIpAddress and tcp.DstPort == $endpointPort)"
$counterForUri = 0
# Adding backend nodes for filtering
foreach ($backendUri in $backendUriList)
{
$counterForUri ++
$backendHost = $backendUri.Host
$ipAddress = (Resolve-DnsName $backendHost).IPAddress
$backendPort = $backendUri.Port
if ($filterString)
{
$filterString += " or (ip.DstAddr == $ipAddress and tcp.DstPort == $backendPort)"
}
else
{
$filterString += "(ip.DstAddr == $ipAddress and tcp.DstPort == $backendPort)"
}
# There is a filter length limit on Clumsy, therefore limiting uris in filter and adding them in a list
if ($counterForUri -ge 30)
{
$filterStringList.Add($filterString)
$counterForUri = 0
$filterString = ""
}
}
if ($filterString)
{
$filterStringList.add($filterString)
}
if (!$dropPercentage)
{
$dropPercentage = 0
}
if (!$delayInMs)
{
$delayInMs = 0
}
# There is a filter length limit on Clumsy, therefore new process for each filter in a list
foreach ($filter in $filterStringList)
{
# Start the fault
clumsy.exe --filter $filter --drop on --drop-outbound on --drop-chance $dropPercentage --lag on --lag-outbound on --lag-chance 100.0 --lag-time $delayInMs
}
if ($durationOfFaultInSec)
{
Start-Sleep -Seconds $durationOfFaultInSec
}
# Clearing the fault
Stop-Process -Name clumsy
# Uninstall Clumsy
choco uninstall clumsy -y
Write-Host "To remove WinDivertXX.sys, please remove/uninstall all WinDivert client application(s) and reboot." -ForegroundColor Cyan