packages/github-actions-usage/src/transform.ts (111 lines of code) (raw):
import type { WorkflowTemplate } from '@actions/workflow-parser';
import {
convertWorkflowTemplate,
NoOperationTraceWriter,
parseWorkflow,
} from '@actions/workflow-parser';
import type {
ActionStep,
Step,
WorkflowJob,
} from '@actions/workflow-parser/model/workflow-template';
import type { RawGithubWorkflow } from './db-read';
import type { UnsavedGithubActionUsage } from './db-write';
export function removeUndefined<T>(array: Array<T | undefined>): T[] {
return array.filter((item): item is T => item !== undefined);
}
export interface GithubWorkflow {
repository: string;
path: string;
template: WorkflowTemplate;
}
/**
* Transform a GitHub Workflow as read from the `github_workflows` table,
* into a row for the `github_action_usage` table.
*/
export async function extractGithubUsesStrings(
rawWorkflows: RawGithubWorkflow[],
): Promise<UnsavedGithubActionUsage[]> {
const workflows: GithubWorkflow[] = removeUndefined(
await Promise.all(
rawWorkflows.map((workflow) => getWorkflowTemplate(workflow)),
),
);
console.log(
`GitHub Workflow summary: total ${rawWorkflows.length}, valid ${workflows.length}, invalid ${rawWorkflows.length - workflows.length}`,
);
return workflows.map<UnsavedGithubActionUsage>(
({ repository, path, template }) => {
const uses = getUsesInWorkflowTemplate(template);
console.log(
`The workflow ${path} in repository ${repository} has ${uses.length} 'uses'`,
);
return {
full_name: repository,
workflow_path: path,
workflow_uses: uses,
};
},
);
}
/**
* Validate the contents of the `github_workflows` table against the YAML schema for GitHub Workflows.
*
* This is necessary as CloudQuery does not guarantee its validity
* because it saves the result from the get repository content API.
*
* @see https://github.com/cloudquery/cloudquery/blob/main/plugins/source/github/resources/services/actions/workflows.go
* @see https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#get-repository-content
*/
export async function getWorkflowTemplate(
rawWorkflow: RawGithubWorkflow,
): Promise<GithubWorkflow | undefined> {
const { path, contents, full_name } = rawWorkflow;
const result = parseWorkflow(
{
name: path,
content: contents,
},
new NoOperationTraceWriter(),
);
const errors = result.context.errors.getErrors();
if (errors.length > 0) {
console.error(
`Failed to parse workflow - path:${path} repository:${full_name} errors:${errors.length}`,
errors.map(({ message }) => message),
);
return undefined;
}
if (!result.value) {
console.error(
`Failed to parse workflow - path:${path} repository:${full_name} value is null`,
);
return undefined;
}
return {
repository: rawWorkflow.full_name,
path: rawWorkflow.path,
template: await convertWorkflowTemplate(result.context, result.value),
};
}
export function getUsesInWorkflowTemplate(workflowTemplate: WorkflowTemplate) {
return removeUndefined(
workflowTemplate.jobs.flatMap((job) => {
switch (job.type) {
case 'job': {
return getUsesInJob(job);
}
case 'reusableWorkflowJob': {
if (!job.jobs) {
return [job.ref.value];
}
return job.jobs.flatMap((job) => getUsesInJob(job));
}
default: {
const _exhaustiveCheck: never = job;
return _exhaustiveCheck;
}
}
}),
);
}
function getUsesInJob(job: WorkflowJob): string[] {
const actionSteps = stepsFromWorkflowJob(job).filter(
(step): step is ActionStep => 'uses' in step,
);
return actionSteps.map((step) => step.uses.value);
}
function stepsFromWorkflowJob(workflowJob: WorkflowJob): Step[] {
if (workflowJob.type === 'job') {
return workflowJob.steps;
}
const childJobs = workflowJob.jobs ?? [];
return childJobs.flatMap(stepsFromWorkflowJob);
}