eng/scripts/Update-Azure-Package-Data.ps1 (326 lines of code) (raw):

[CmdletBinding()] param ( [string] $github_pat = $env:GITHUB_PAT, # Use long names for languages so data explorer cached string searching is more performant [array] $languages = @("java", "dotnet", "python", "javascript", "golang", "cpp"), [int] $daysAgo = 730, [string] $outPath = "package-data.csv", [switch] $clearTable, [switch] $updateDatabase ) . "$PSScriptRoot\PackageVersion-Helpers.ps1" $ErrorActionPreference = 'Stop' $PSNativeCommandUseErrorActionPreference = $true $CachedKustoToken = $null function Get-java-Packages($daysAgo) { # Rest API docs https://search.maven.org/classic/#api $baseMavenQueryUrl = "https://search.maven.org/solrsearch/select?q=g:com.microsoft.azure*%20OR%20g:com.azure*&rows=100&wt=json" $mavenQuery = Invoke-RestMethod $baseMavenQueryUrl -MaximumRetryCount 3 Write-Host "Found $($mavenQuery.response.numFound) java packages on maven packages" $packages = @() $count = 0 while ($count -lt $mavenQuery.response.numFound) { $responsePackages = $mavenQuery.response.docs foreach ($pkg in $responsePackages) { if ($pkg.g -ne "com.azure.android") { $packages += @{ Name = $pkg.a; GroupId = $pkg.g } } } $count += $mavenQuery.response.docs.count $mavenQuery = Invoke-RestMethod ($baseMavenQueryUrl + "&start=$count") -MaximumRetryCount 3 } $allPackageVersionList = @() $pkgNum = 0 foreach ($pkg in $packages) { $pkgNum++ $pkgName = $pkg.Name $versionsMavenQueryUrl = "https://search.maven.org/solrsearch/select?q=a:${pkgName}&core=gav&rows=1000&wt=json" $versionsQuery = Invoke-RestMethod $versionsMavenQueryUrl -MaximumRetryCount 3 $count = 0 while ($count -lt $versionsQuery.response.numFound) { Write-Host "$pkgNum - $count - Getting versions for $($pkg.GroupId):$($pkg.Name)" $versions = $versionsQuery.response.docs foreach ($ver in $versions) { $verDate = [datetimeoffset]::FromUnixTimeMilliseconds($ver.timestamp).DateTime $allPackageVersionList += ,(@($pkgName, $ver.v, $verDate)) } $count += $versionsQuery.response.docs.count $versionsQuery = Invoke-RestMethod ($versionsMavenQueryUrl + "&start=$count") -MaximumRetryCount 3 } } return $allPackageVersionList } function Get-dotnet-Packages($daysAgo) { # Rest API docs # https://docs.microsoft.com/nuget/api/search-query-service-resource # https://docs.microsoft.com/nuget/consume-packages/finding-and-choosing-packages#search-syntax $nugetQuery = Invoke-RestMethod "https://azuresearch-usnc.nuget.org/query?q=owner:azure-sdk&prerelease=true&semVerLevel=2.0.0&take=1000" -MaximumRetryCount 3 Write-Host "Found $($nugetQuery.totalHits) nuget packages" $packages = $nugetQuery.data $allPackageVersionList = @() $pkgNum = 0 foreach ($pkg in $packages) { if ($pkg.title -notlike 'Azure.*' -and $pkg.title -notlike 'Microsoft.Azure.*') { Write-Host "Skipping $($pkg.title)" continue } Write-Host "$pkgNum - Getting versions for $($pkg.title)" $versionsQuery = Invoke-RestMethod $pkg.registration -MaximumRetryCount 3 $versions = $versionsQuery.items foreach ($versionGroup in $versions) { foreach ($versionData in $versionGroup.items) { $version = ($versionData.packageContent -split '/')[5] $time = $versionData.catalogEntry.published $allPackageVersionList += ,(@($pkg.title, $version, $time)) } } $pkgNum++ } return $allPackageVersionList } function Get-javascript-Packages($daysAgo) { $from = 0 $npmPackages = @() do { # Rest API docs https://github.com/npm/registry/blob/master/docs/REGISTRY-API.md # max size returned is 250 so we have to do some basic paging. $npmQuery = Invoke-RestMethod "https://registry.npmjs.com/-/v1/search?text=maintainer:azure-sdk&size=250&from=$from" -MaximumRetryCount 3 if ($npmQuery.objects.Count -ne 0) { $npmPackages += $npmQuery.objects.package } $from += $npmQuery.objects.Count } while ($npmQuery.objects.Count -ne 0); $publishedPackages = $npmPackages | Where-Object { $_.publisher.username -eq "azure-sdk" } Write-Host "Found $($publishedPackages.Count) npm packages" $allPackageVersionList = @() $pkgNum = 0 foreach ($pkg in $publishedPackages) { Write-Host "$pkgNum - Getting versions for $($pkg.name)" $versions = npm show $pkg.name time --json | ConvertFrom-Json $releases = $versions.PSObject.Properties | Where-Object { $_ -notlike "*created*" -and $_ -notlike "*modified*" -and $_ -notlike '*-dev*' -and $_ -notlike '*-alpha*' } foreach ($release in $releases) { $allPackageVersionList += ,(@($pkg.name, $release.Name, $release.Value)) } $pkgNum++ } return $allPackageVersionList } function Get-python-Packages($daysAgo) { $pythonQuery = "import xmlrpc.client; [print(pkg[1]) for pkg in xmlrpc.client.ServerProxy('https://pypi.org/pypi').user_packages('azure-sdk')]" $pythonPackagesNames = (python -c "$pythonQuery") $pythonPackages = $pythonPackagesNames | Foreach-Object { try { (Invoke-RestMethod "https://pypi.org/pypi/$_/json" -MaximumRetryCount 3) } catch { } } Write-Host "Found $($pythonPackages.Count) python packages" $releasesWithDate = @() $pkgNum = 0 foreach ($package in $pythonPackages) { if ($package.info.name -notlike "azure-*") { Write-Host "Skipping $($package.info.name)"; continue } $packageReleases = @() foreach ($prop in $package.releases.PSObject.Properties) { $packageReleases += ,(@($package.info.name, $prop.Name, $prop.Value.upload_time?[0])) } Write-Host "$pkgNum - $($package.info.name)" $pkgNum++ foreach ($pr in $packageReleases) { $releasesWithDate += ,($pr) } } return $releasesWithDate } function Get-cpp-Packages($daysAgo) { $offset = [DateTimeOffset]::UtcNow.AddDays(-$daysAgo) $repoTags = GetPackageVersions -lang "cpp" -afterDate $offset Write-Host "Found $($repoTags.Count) recent tags in cpp repo" foreach ($tag in $repoTags.Keys) { foreach ($versionData in $repoTags[$tag].Versions) { $allPackageVersionList += ,(@($tag, $versionData.RawVersion, (Get-Date $versionData.Date))) } } return $allPackageVersionList } function Get-golang-Packages($daysAgo) { $offset = [DateTimeOffset]::UtcNow.AddDays(-$daysAgo) $repoTags = GetPackageVersions -lang "go" -afterDate $offset Write-Host "Found $($repoTags.Count) recent tags in go repo" $allPackageVersionList = @() foreach ($tag in $repoTags.Keys) { # We should keep this regex in sync with what is in the go repo at https://github.com/Azure/azure-sdk-for-go/blob/main/eng/scripts/Language-Settings.ps1#L40 if ($tag -match "(?<modPath>(sdk|profile)/(?<serviceDir>(.*?(?<serviceName>[^/]+)/)?(?<modName>[^/]+$)))") { foreach ($versionData in $repoTags[$tag].Versions) { $allPackageVersionList += ,(@($tag, $versionData.RawVersion, (Get-Date $versionData.Date))) } } } return $allPackageVersionList } function Set-Package-Data($languages, $daysAgo, $outPath) { $allPackages = @() foreach ($lang in $languages) { $supportedLanguages = @("dotnet", "java", "javascript", "python", "golang", "cpp") if ($lang -notin $supportedLanguages) { throw "Unknown language $lang. Supported languages are $supportedLanguages" } $packages = Invoke-Expression "Get-$lang-Packages $daysAgo" foreach ($pkg in $packages) { if ($null -eq $pkg[2]) { Write-Warning "No package date for $($pkg[0]) - $($pkg[1])" continue } $pkg += $lang if ((Get-Date $pkg[2]) -ge ((Get-Date).AddDays(-$daysAgo))) { $allPackages += ,@($pkg) } } } $allPackages ` | Sort-Object { $_[2] } ` # Sort by date | ForEach-Object { [PSCustomObject]@{ "Date" = $_[2]; "Package" = $_[0]; "Version" = $_[1]; "Language" = $_[3] } } ` | ConvertTo-Csv -UseQuotes Never ` | Out-File $outPath } # Helper function to view quick package counts for a time period in csv format function Get-Package-Buckets($languages, $daysAgo) { $today = Get-Date $dayHash = @{} $datePos = 0 while ($datePos -ge -$daysAgo) { # Zero value all dates so data explorer queries and charts are easier to normalize $day = Get-Date $today.AddDays($datePos) -Format "yyyy-MM-dd" $dayHash[$day] = @{} foreach ($lang in $languages) { $dayHash[$day][$lang] = 0 } $datePos-- } foreach ($lang in $languages) { $supportedLanguages = @("dotnet", "java", "javascript", "python", "golang", "cpp") if ($lang -notin $supportedLanguages) { throw "Unknown language $lang. Supported languages are $supportedLanguages" } $total = 0 $packages = Invoke-Expression "Get-$lang-Packages $daysAgo" $recentPackages = $packages | Where-Object { $_[2] -ge $today.AddDays(-$daysAgo) } foreach ($pkg in $recentPackages) { $day = Get-Date $pkg[2] -Format "yyyy-MM-dd" $dayHash[$day][$lang] = $dayHash[$day][$lang] + 1 $total++ } Write-Host "Total packages for $lang - $total" } $header = @("DATE") foreach($lang in $languages) { $header += $lang.ToUpper() } Write-Host ($header -join ",") foreach ($day in $dayHash.Keys) { $line = @($day) foreach($lang in $languages) { $line += $dayHash[$day][$lang] } Write-Host ($line -join ",") } } function sendDataExplorerCommand([switch]$mgmt, [string]$query) { $cluster = 'https://azsdkengsys.westus2.kusto.windows.net' if (!$CachedKustoToken) { $CachedKustoToken = az account get-access-token --resource $cluster --query accessToken --output tsv } $secureToken = ConvertTo-SecureString -String $CachedKustoToken -AsPlainText -Force $endpoint = if ($mgmt) { "$cluster/v1/rest/mgmt" } else { "$cluster/v2/rest/query" } $body = @{ db = 'Pipelines'; csl = $query } | ConvertTo-Json $resp = Invoke-RestMethod ` -Method Post $endpoint ` -Body $body ` -Authentication Bearer ` -Token $secureToken ` -ContentType 'application/json' ` -Headers @{ accept = 'application/json' } return $resp } function Set-DataExplorer([switch]$clearTable) { $ErrorActionPreference = "Stop" $table = 'BebroderTest' $packageBlob = 'https://azsdkpackagereleasedata.blob.core.windows.net/data/package-data.csv' if ($clearTable) { Write-Host "Clearing table $table" $csl = ".clear table $table data" $resp = sendDataExplorerCommand -mgmt -query $csl } Write-Host "Sleeping 5 seconds to avoid throttling" Start-Sleep -Seconds 5 Write-Host "Ingesting into table $table from $packageBlob" $csl = ".ingest into table $table '$packageBlob' with (ignoreFirstRecord=true)" try { $resp = sendDataExplorerCommand -mgmt -query $csl } catch { # Data Explorer ingest throttling seems to be 1+ minutes, so add a long wait here for pipeline usage Write-Warning "Sleeping 5 minutes before retrying ingest" Start-Sleep -Seconds 300 $resp = sendDataExplorerCommand -mgmt -query $csl } Write-Host "Sleeping 5 seconds to avoid throttling" Start-Sleep -Seconds 5 Write-Host "Table $table count" $csl = "$table | count" $resp = sendDataExplorerCommand -query $csl $columns = $resp[2].Columns $rows = $resp[2].Rows Write-Host $columns.ColumnName foreach ($row in $rows) { Write-Host $row } } # Get-Package-Buckets $Languages $daysAgo Set-Package-Data $Languages $daysAgo $outPath if ($updateDatabase) { az storage blob upload -f $outPath --account-name azsdkpackagereleasedata --container-name data --overwrite Set-DataExplorer $clearTable }