tools/assets-automation/assets-maintenance-tool/Azure.Sdk.Tools.Assets.MaintenanceTool/Scan/AssetsScanner.cs (229 lines of code) (raw):
using System.Text;
using System.Text.Json;
using Azure.Sdk.Tools.Assets.MaintenanceTool.Model;
using Azure.Sdk.Tools.TestProxy.Common.Exceptions;
using Azure.Sdk.Tools.TestProxy.Store;
using Microsoft.Extensions.FileSystemGlobbing;
namespace Azure.Sdk.Tools.Assets.MaintenanceTool.Scan;
/// <summary>
/// Used to walk through repo configurations and locate all assets.
/// </summary>
public class AssetsScanner
{
public string WorkingDirectory { get; set; }
public static readonly string GitTokenEnvVar = "GIT_TOKEN";
private string ResultsFile
=> Path.Combine(WorkingDirectory, "output.json");
public GitProcessHandler handler { get; set; } = new GitProcessHandler();
public AssetsScanner(string? workingDirectory = null)
{
WorkingDirectory = workingDirectory ?? Directory.GetCurrentDirectory();
}
/// <summary>
/// Walk a run configuration and create a resultSet of all found assets.json references.
///
/// This function automatically takes previous output into account by checking in the current
/// working directory for an "output.json" file that contains the output of a previously run Scan.
/// </summary>
/// <param name="config"></param>
/// <returns>A set of results which combines any previous output with a new scan.</returns>
public AssetsResultSet Scan(RunConfiguration config)
{
var resultSet = new List<AssetsResult>();
AssetsResultSet? existingResults = ParseExistingResults();
Parallel.ForEach(config.LanguageRepos, repoConfig =>
{
resultSet.AddRange(ScanRepo(repoConfig, existingResults));
});
return new AssetsResultSet(resultSet);
}
/// <summary>
/// If the tool is invoked in a directory containing an "output.json" file, that file will be parsed
/// for its results. The file itself is merely a List of type AssetsResult serialized to disk.
/// </summary>
/// <returns></returns>
public AssetsResultSet? ParseExistingResults()
{
if (File.Exists(ResultsFile))
{
using var stream = System.IO.File.OpenRead(ResultsFile);
using var doc = JsonDocument.Parse(stream);
var results = JsonSerializer.Deserialize<List<AssetsResult>>(doc);
if (results != null)
{
return new AssetsResultSet(results);
}
}
return null;
}
/// <summary>
/// Given a repo configuration, scan the repo and return an AssetsResult list from all targeted branches.
/// </summary>
/// <param name="config"></param>
/// <param name="previousOutput"></param>
/// <returns></returns>
private List<AssetsResult> ScanRepo(RepoConfiguration config, AssetsResultSet? previousOutput)
{
string? envOverride = Environment.GetEnvironmentVariable(GitTokenEnvVar);
var authString = string.Empty;
if (!string.IsNullOrWhiteSpace(envOverride))
{
authString = $"{envOverride}@";
}
var targetRepoUri = $"https://{authString}github.com/{config.LanguageRepo}.git";
var workingDirectory = Path.Combine(WorkingDirectory, config.LanguageRepo.Replace("/", "_"));
var results = new List<AssetsResult>();
if (!Directory.Exists(workingDirectory))
{
Directory.CreateDirectory(workingDirectory);
}
foreach (var branch in config.Branches)
{
var commitsOnBranch = GetBranchCommits(targetRepoUri, branch, config.ScanStartDate, workingDirectory);
var unretrievedCommits = ResolveUnhandledCommits(commitsOnBranch, previousOutput);
results.AddRange(GetAssetsResults(config.LanguageRepo, unretrievedCommits, workingDirectory, config.ScanFolders));
if (previousOutput != null)
{
foreach (var commit in commitsOnBranch.Where(commit => !unretrievedCommits.Contains(commit)))
{
results.AddRange(previousOutput.ByOriginSHA[commit]);
}
}
}
return results;
}
/// <summary>
/// Clones a specific branch, then returns all commit shas newer than our targeted date.
/// </summary>
/// <returns>A list of commits (limited to after a startdate) from the targeted branch.</returns>
private List<string> GetBranchCommits(string uri, string branch, string since, string workingDirectory)
{
var commitSHAs = new List<string>();
try
{
// if git is already initialized, we just need to checkout a specific branch
if (!Directory.Exists(Path.Combine(workingDirectory, ".git")))
{
handler.Run($"clone {uri} --branch {branch} --single-branch .", workingDirectory);
}
else
{
handler.Run($"fetch origin {branch}", workingDirectory);
handler.Run($"branch {branch} FETCH_HEAD", workingDirectory);
handler.Run($"checkout {branch}", workingDirectory);
Cleanup(workingDirectory);
}
CommandResult tagResult;
if (since == "latest")
{
tagResult = handler.Run($"log -n 1 --format=format:%H", workingDirectory);
}
else
{
tagResult = handler.Run($"log --since={since} --format=format:%H", workingDirectory);
}
commitSHAs.AddRange(tagResult.StdOut.Split(Environment.NewLine).Select(x => x.Trim()).Where(x => !string.IsNullOrWhiteSpace(x)));
}
catch (GitProcessException gitException)
{
// special case handling here?
Console.WriteLine(gitException.ToString());
Environment.Exit(1);
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
Environment.Exit(1);
}
return commitSHAs;
}
/// <summary>
/// We only need to process each commit _once_, as commit SHAs are immutable in git. Given that, once we have
/// a list of commits from a targeted branch, we need to check against the previous results to ensure we don't
/// reprocess those and emit duplicate assetsResults.
/// </summary>
/// <returns>The set of unprocessed commit SHAs.</returns>
private List<string> ResolveUnhandledCommits(List<string> commits, AssetsResultSet? previousResults)
{
if (previousResults == null)
{
return commits;
}
else
{
return commits.Where(x => !previousResults.ByOriginSHA.ContainsKey(x)).ToList();
}
}
/// <summary>
/// Used to easily parse an assets.json and grab only the properties that this tool cares about.
/// </summary>
private class Assets
{
public Assets()
{
AssetsRepo = string.Empty;
Tag = string.Empty;
}
public string AssetsRepo { get; set; }
public string Tag { get; set; }
}
/// <summary>
/// Deserialize an assets.json from disk into a class instance to retrieve the targeted Tag and Assets Repository.
/// </summary>
/// <param name="assetsJson"></param>
/// <returns>A class instance containing the assets.json details.</returns>
private Assets? ExtractAssetsData(string assetsJson)
{
return JsonSerializer.Deserialize<Assets>(File.ReadAllText(assetsJson));
}
/// <summary>
/// Find all assets.jsons beneath a targeted folder.
/// </summary>
/// <returns>AssetsResults for each discovered assets.json, populating other metadata as necessary.</returns>
private List<AssetsResult> ScanDirectory(string repo, string commit, string workingDirectory, List<string> scanFolders)
{
Matcher matcher = new();
List<AssetsResult> locatedAssets = new List<AssetsResult>();
if (scanFolders.Count > 0)
{
foreach (string folder in scanFolders)
{
matcher.AddIncludePatterns(new[] { Path.Combine(folder, "**/assets.json") });
}
}
else
{
matcher.AddIncludePatterns(new[] { "**/assets.json" });
}
IEnumerable<string> assetsJsons = matcher.GetResultsInFullPath(workingDirectory);
foreach (var assetsJson in assetsJsons)
{
var path = Path.GetRelativePath(workingDirectory, assetsJson).Replace("\\", "/");
var assetsData = ExtractAssetsData(assetsJson);
if (assetsData != null)
{
var newResult = new AssetsResult(repo, commit, path, assetsData.Tag, assetsData.AssetsRepo, null);
locatedAssets.Add(newResult);
}
}
return locatedAssets;
}
/// <summary>
/// Walks a set of targeted commits, extracting all available assets.jsons from each.
/// </summary>
/// <returns>A list of AssetsResults reflecting all discovered assets.jsons from each targeted commit.</returns>
private List<AssetsResult> GetAssetsResults(string repo, List<string> commits, string workingDirectory, List<string> folderGlobs)
{
var allResults = new List<AssetsResult>();
foreach (var commit in commits)
{
handler.Run($"checkout {commit}", workingDirectory);
Cleanup(workingDirectory);
allResults.AddRange(ScanDirectory(repo, commit, workingDirectory, folderGlobs));
}
return allResults;
}
/// <summary>
/// Cleans up a git repo. When swapping between commits, we don't want to accidentally include assets.jsons that are
/// present simply because a folder didn't auto delete itself when we switched commits.
/// </summary>
private void Cleanup(string workingDirectory)
{
try
{
handler.Run("clean -xdf", workingDirectory);
}
catch (GitProcessException gitException)
{
Console.WriteLine(gitException.ToString());
Environment.Exit(1);
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
Environment.Exit(1);
}
}
/// <summary>
/// Intended to be aimed at a specific .git folder. Walks every file and ensures that
/// any wonky permissions that could prevent deletion are removed.
///
/// This is necessary because certain `.pack` files created by git cannot be deleted without
/// adjusting these permissions.
/// </summary>
public static void SetPermissionsAndDelete(string gitfolder)
{
File.SetAttributes(gitfolder, FileAttributes.Normal);
string[] files = Directory.GetFiles(gitfolder);
string[] dirs = Directory.GetDirectories(gitfolder);
foreach (string file in files)
{
File.SetAttributes(file, FileAttributes.Normal);
File.Delete(file);
}
foreach (string dir in dirs)
{
SetPermissionsAndDelete(dir);
}
Directory.Delete(gitfolder, false);
}
/// <summary>
/// The .git folder's .pack files can be super finicky to delete from code.
/// This function abstracts the necessary permissions update and cleans that folder for us.
/// </summary>
public static void CleanupGitDirectory(string workingDirectory)
{
var gitDir = Path.Combine(workingDirectory, ".git");
if (Directory.Exists(gitDir))
{
SetPermissionsAndDelete(gitDir);
}
Directory.Delete(workingDirectory, true);
}
/// <summary>
/// Writes a resultSet to disk.
/// </summary>
public void Save(AssetsResultSet newResults)
{
using (var stream = System.IO.File.OpenWrite(ResultsFile))
{
var options = new JsonSerializerOptions
{
WriteIndented = true
};
stream.Write(Encoding.UTF8.GetBytes(JsonSerializer.Serialize(newResults.Results, options: options)));
}
}
}