scripts/deno/thrasher-tracker.ts (144 lines of code) (raw):

import { array, object, string } from 'npm:zod@3'; import { fetchJSON } from './json.ts'; import prettyBytes from 'npm:pretty-bytes@6'; import { octokit } from './github.ts'; // -- Constants -- // const gu = 'https://www.theguardian.com/'; const fronts = ['uk', 'us', 'international', 'au'] as const; const regex = /(http|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])/g; const frontSchema = object({ pressedPage: object({ collections: array( object({ id: string(), collectionType: string(), displayName: string(), curated: array( object({ enriched: object({ embedHtml: string().optional(), embedCss: string().optional(), embedJs: string().optional(), }), }), ), }), ), }), }); /** * We ignore all fonts extensions because browsers will only load * `woff2`one of these resources at a time. */ const _fontsExtensions = ['woff', 'ttf']; const supportedResourceExtensions = [ 'js', 'png', 'woff2', 'gif', 'jpg', 'mp4', 'css', ]; // -- Methods -- // const getExtension = (url: URL) => url.pathname.split('.').slice(-1)[0]; const isSupportedResourceType = (url: URL): boolean => supportedResourceExtensions.includes(getExtension(url)); const getResourceSize = async (url: URL): Promise<number> => { const response = await fetch(url); return (await response.blob()).size; }; const getThrasherResources = (urls: URL[]) => { return Promise.all( urls.map(async (url) => { const size = await getResourceSize(url); return { url, size }; }), ); }; const getFrontThrashers = async (path: string) => { const url = new URL(`${path}.json?dcr`, gu); const { pressedPage: { collections }, } = await fetchJSON(url, { parser: frontSchema.parse }); const thrashers = collections.filter( (collection) => collection.collectionType === 'fixed/thrasher', ); const thrashersWithResources = thrashers.flatMap( async ({ displayName, curated: [{ enriched }] }) => { const resourceUrls = Object.values(enriched) .flatMap((embed) => [...embed.matchAll(regex)].map(([url]) => new URL(url)) ) .filter(isSupportedResourceType); const resources = await getThrasherResources(resourceUrls); const embedSize = new Blob([...Object.values(enriched)]).size; const resourceSize = resources.reduce((map, { url, size }) => { const ext = getExtension(url); const acc = map.get(ext) ?? 0; map.set(ext, acc + size); return map; }, new Map<string, number>()); return { displayName, resources, embedSize, resourceSize, totalSize: embedSize + [...resourceSize.values()].reduce( (acc, next) => acc + next, 0, ), }; }, ); return Promise.all(thrashersWithResources); }; const getTable = (data: Awaited<ReturnType<typeof getFrontThrashers>>) => { const rows = data .slice() .sort((a, b) => b.totalSize - a.totalSize) .map( ({ displayName, embedSize, resourceSize, totalSize }) => '| ' + [ displayName, prettyBytes(totalSize), prettyBytes(embedSize), [...resourceSize.entries()] .sort(([, a], [, b]) => b - a) .map( ([resourceType, size]) => `\`${resourceType}\`: ${prettyBytes(size)}`, ) .join(', '), ].join(' | ') + ' |', ); return [ '| Name | Total size | Embed size | Resources |', '| ---- | ---------- | ---------- | --------- |', ...rows, ]; }; // -- Script -- // const lines = ['# Largest thrashers on network fronts']; for (const front of fronts) { lines.push( '', '', `## [${front.toUpperCase()} Front](${new URL(front, gu).toString()}) `, '', ); const data = await getFrontThrashers(front); lines.push(...getTable(data)); } const body = lines.join('\n'); if (!octokit) { console.log(body); Deno.exit(); } const { data: { html_url }, } = await octokit.rest.issues.update({ owner: 'guardian', repo: 'dotcom-rendering', issue_number: 5856, body, }); console.log('Updated issue:', html_url); Deno.exit();