site/firefox-models/models.mjs (497 lines of code) (raw):

// @ts-check import { changeLocation, exposeAsGlobal, getElement } from "../utils.mjs"; main().catch((error) => { console.error(error); getElement("error").style.display = "block"; }); const aLessThanB = "a".localeCompare("b"); const aGreaterThanB = aLessThanB * -1; const aEqualToB = 0; /** * @param {string} url * @returns {Promise<any>} */ async function fetchJSON(url) { const response = await fetch(url); if (!response.ok) { console.error(response); throw new Error("Response failed."); } return await response.json(); } async function main() { getElement("counts").style.display = "table"; const remoteSettingsPreviewCheckbox = /** @type {HTMLInputElement} */ ( getElement("remoteSettingsPreview") ); const urlParams = new URLSearchParams(window.location.search); const isPreview = urlParams.get("preview"); remoteSettingsPreviewCheckbox.checked = isPreview === "true"; remoteSettingsPreviewCheckbox.addEventListener("change", () => { const urlParams = new URLSearchParams(window.location.search); if (remoteSettingsPreviewCheckbox.checked) { urlParams.set("preview", "true"); } else { urlParams.delete("preview"); } changeLocation(urlParams); }); const bucket = isPreview ? "main-preview" : "main"; /** @type {{ data: ModelRecord[] }} */ const records = await fetchJSON( `https://firefox.settings.services.mozilla.com/v1/buckets/${bucket}/collections/translations-models/records` ); exposeAsGlobal("records", records.data); const attachmentsByKey = getAttachmentsByKey(records.data); countModels(records.data); /** @type {EvalResults} */ const cometResults = await fetchJSON( "https://raw.githubusercontent.com/mozilla/firefox-translations-models/main/evaluation/comet-results.json" ); logCometResults(cometResults); /** * @typedef {Object} ModelEntry * @property {string} lang * @property {string} version * @property {string} display * @property {ModelRecord[]} fromEn * @property {ModelRecord[]} toEn */ /** @type {Map<string, ModelEntry>} */ const modelsMap = new Map(); const models = records.data.filter((record) => record.fileType === "model"); exposeAsGlobal("models", models); const dn = new Intl.DisplayNames("en", { type: "language", fallback: "code", languageDisplay: "standard", }); for (const model of models) { /** @type {ModelEntry | undefined} */ let entry; if (model.fromLang === "en") { entry = modelsMap.get(model.toLang + " " + model.version); if (!entry) { entry = { lang: model.toLang, version: model.version, display: dn.of(model.toLang) ?? model.toLang, toEn: [], fromEn: [], }; } entry.fromEn.push(model); } else { entry = modelsMap.get(model.fromLang + " " + model.version); if (!entry) { entry = { lang: model.fromLang, version: model.version, display: dn.of(model.fromLang) ?? model.fromLang, toEn: [], fromEn: [], }; } entry.toEn.push(model); } modelsMap.set(entry.lang + " " + model.version, entry); } const tbody = getElement("tbody"); const modelEntries = [...modelsMap.values()].sort((a, b) => `${a.lang}`.localeCompare(b.lang) ); modelEntries.sort((a, b) => a.display.localeCompare(b.display)); for (const entry of modelEntries) { entry.fromEn.sort((a, b) => -versionCompare(a.version, b.version)); entry.toEn.sort((a, b) => -versionCompare(a.version, b.version)); } for (const { lang, toEn, fromEn } of modelEntries) { const tr = document.createElement("tr"); /** * @param {string | HTML} [text] */ const td = (text = "") => { const el = document.createElement("td"); el.innerText = text; tr.appendChild(el); return el; }; td(dn.of(lang)); addToRow( td, `${lang}-en`, records.data, cometResults, attachmentsByKey, toEn[0] ); addToRow( td, `en-${lang}`, records.data, cometResults, attachmentsByKey, fromEn[0] ); tbody.append(tr); } getElement("loading").style.display = "none"; getElement("table").style.display = "table"; } /** * @param {(text?: string) => HTMLTableCellElement} td * @param {string} pair * @param {ModelRecord[]} records * @param {EvalResults} cometResults * @param {Map<string, Array<[string, string]>>} attachmentsByKey * @param {ModelRecord} [model] */ function addToRow(td, pair, records, cometResults, attachmentsByKey, model) { const modelNameTD = td(); if (model) { // Add the attachments. const attachments = attachmentsByKey.get(getAttachmentKey(model)); if (attachments) { const div = document.createElement("div"); div.className = "attachments"; for (const [name, url] of attachments) { const a = document.createElement("a"); a.innerText = name; a.href = url; div.appendChild(a); } const button = document.createElement("button"); button.innerText = pair; // Hide when clicking outside of the button and popup. document.body.addEventListener("click", (event) => { const target = /** @type {Node | null} */ (event.target); if (target && !div.contains(target) && target !== button) { div.style.display = "none"; } }); button.addEventListener("click", () => { if (div.style.display === "block") { div.style.display = "none"; } else { div.style.display = "block"; } }); modelNameTD.appendChild(button); modelNameTD.appendChild(div); } else { modelNameTD.innerText = pair; } } td(model?.version); td(getModelSize(records, model)); const releaseEl = td(getReleaseChannel(model)); releaseEl.title = model?.filter_expression; const googleComet = cometResults[pair]?.["flores-test"]?.["google"]; const bergamotComet = cometResults[pair]?.["flores-test"]?.["bergamot"]; const googleCometAvg = getAverageScore(pair, cometResults, "google"); const bergamotCometAvg = getAverageScore(pair, cometResults, "bergamot"); const hasEvals = bergamotComet && googleComet; const bergamotCometDisplay = (100 * bergamotComet).toFixed(2); const percentage = 100 * (1 - googleComet / bergamotComet); const sign = percentage >= 0 ? "+" : ""; let scoreDisplay = ""; if (hasEvals) { const percentDisplay = `${sign}${percentage.toFixed(2)}%`.padStart( 7, "\u00A0" ); scoreDisplay = `${bergamotCometDisplay} ${percentDisplay}`; } const avgPercentage = 100 * (1 - googleCometAvg / bergamotCometAvg); const avgSign = avgPercentage >= 0 ? "+" : ""; const avgPercentageDisplay = hasEvals ? `${avgSign}${avgPercentage.toFixed(2)}%` : ""; const el = td(scoreDisplay); if (hasEvals) { let shippable = "Shippable"; // el.style.color = "#fff"; // el.style.background = "#2ebffc"; if (percentage < -5) { // Does not meet release criteria. el.style.background = "#ffa537"; // el.style.color = "#000"; shippable = "Not shippable"; } el.title = `${shippable} - COMET ${(100 * bergamotComet).toFixed(2)} ` + `vs Google Comet ${(100 * googleComet).toFixed(2)} ` + `(${scoreDisplay})` + "\n\n" + `avg COMET ${(100 * bergamotCometAvg).toFixed(2)} ` + `vs Google avg Comet ${(100 * googleCometAvg).toFixed(2)} ` + `(${avgPercentageDisplay})`; } } /** * @param {string} pair * @param {EvalResults} cometResults * @param {string} translator */ function getAverageScore(pair, cometResults, translator) { let count = 0; let total = 0; const datasets = cometResults[pair]; if (!datasets) { return 0; } for (const obj of Object.values(datasets)) { const score = obj[translator]; if (score) { count++; total += score; } } if (count === 0) { return 0; } return total / count; } /** * @param {ModelRecord[]} records * @param {ModelRecord} [model] */ function getModelSize(records, model) { if (!model) { return ""; } let size = 0; for (const record of records) { if ( record.fromLang === model.fromLang && record.toLang === model.toLang && record.version === model.version && record.filter_expression === model.filter_expression ) { size += Number(record.attachment.size); } } return (size / 1000 / 1000).toFixed(1) + " MB"; } /** * Compare two versions quickly. * @param {string} a * @param {string} b * @return {number} */ export default function versionCompare(a, b) { /** @type {any[]} */ const aParts = a.split("."); /** @type {any[]} */ const bParts = b.split("."); while (aParts.length < 3) { aParts.unshift("0"); } while (bParts.length < 3) { bParts.unshift("0"); } const [, aEnd, aBeta] = aParts[2].match(/(\d+)([a-z]\d?)?/) ?? [ undefined, "0", "", ]; const [, bEnd, bBeta] = bParts[2].match(/(\d+)([a-z]\d?)?/) ?? [ undefined, "0", "", ]; aParts.pop(); bParts.pop(); aParts.push(aEnd); bParts.push(bEnd); aParts[0] = Number(aParts[0]); aParts[1] = Number(aParts[1]); aParts[2] = Number(aParts[2]); bParts[0] = Number(bParts[0]); bParts[1] = Number(bParts[1]); bParts[2] = Number(bParts[2]); for (const part of aParts) { if (isNaN(part)) { console.error(aParts); throw new Error(a + " had an NaN."); } } for (const part of bParts) { if (isNaN(part)) { console.error(bParts); throw new Error(a + " had an NaN."); } } for (let i = 0; i < 3; i++) { const aPart = aParts[i]; const bPart = bParts[i]; if (aPart > bPart) return aGreaterThanB; if (aPart < bPart) return aLessThanB; } if (!aBeta && !bBeta) return aEqualToB; if (!aBeta) return aGreaterThanB; if (!bBeta) return aLessThanB; return aBeta.localeCompare(bBeta); } /** * @param {ModelRecord} [model] * @returns {string} */ function getReleaseChannel(model) { if (!model) { return ""; } let filterExpression = model.filter_expression ?? ""; filterExpression = filterExpression.replace( "env.channel == 'default'", "Local Build" ); filterExpression = filterExpression.replace( "env.channel == 'nightly'", "Nightly" ); filterExpression = filterExpression.replace("env.channel == 'beta'", "Beta"); filterExpression = filterExpression.replace( "env.channel == 'release'", "Release" ); filterExpression = filterExpression.replace( "env.channel == 'aurora'", "Aurora" ); filterExpression = filterExpression.replace("||", "or"); filterExpression = filterExpression.replace("&&", "and"); if (!filterExpression) { return "Released"; } if (model.version?.endsWith("a1")) { filterExpression = "Local Build or Nightly"; } if (filterExpression === "Local Build or Nightly") { // Simplify this to just nightly. return "Nightly"; } return "Custom"; } /** * @param {string} a * @param {string} b * @param {number} direction */ function assertComparison(a, b, direction) { if (versionCompare(a, b) !== direction) { throw new Error(`Expected ${a} ${b} to compare to ${direction}`); } } assertComparison("1.0a", "1.0", aLessThanB); assertComparison("1.0a1", "1.0", aLessThanB); assertComparison("1.0a", "1.0a", aEqualToB); assertComparison("0.1.0a", "1.0a", aEqualToB); assertComparison("1.0", "1.0a", aGreaterThanB); assertComparison("1.0", "1.0a1", aGreaterThanB); assertComparison("1.0", "2.0", aLessThanB); assertComparison("1.0", "1.1", aLessThanB); assertComparison("1.0a", "1.1", aLessThanB); /** * @param {EvalResults} cometResults */ function logCometResults(cometResults) { /** @type {Array<unknown[]>} */ const xx_en = []; const en_xx = []; for (const [langPair, evaluation] of Object.entries(cometResults)) { const flores = evaluation["flores-dev"]; const [fromLang, toLang] = langPair.split("-"); const row = [ langPair, fromLang, toLang, flores.google || "", flores.bergamot || "", ]; if (fromLang === "en") { en_xx.push(row); } else { xx_en.push(row); } } /** * @param {any} a * @param {any} b */ function sortRow(a, b) { return (a[1] + "-" + a[2]).localeCompare(b[1] + "-" + b[2]); } xx_en.sort(sortRow); en_xx.sort(sortRow); const rows = [ ["Lang Pair", "From", "To", "Google", "Bergamot"], ...en_xx, ...xx_en, ]; let tsv = ""; for (const row of rows) { tsv += row.join("\t") + "\n"; } console.log(tsv); } /** * @param {ModelRecord} record */ function getAttachmentKey(record) { const { fromLang, toLang, version } = record; return `${fromLang}-${toLang} ${version}`; } /** * @param {ModelRecord[]} records */ function getAttachmentsByKey(records) { /** @type {Map<string, Array<[string, string]>>} */ const attachmentsByKey = new Map(); for (const record of records) { const key = getAttachmentKey(record); let attachments = attachmentsByKey.get(key); if (!attachments) { attachments = []; attachmentsByKey.set(key, attachments); } attachments.push([ record.name, `https://firefox-settings-attachments.cdn.mozilla.net/${record.attachment.location}`, ]); } return attachmentsByKey; } /** * @param {ModelRecord[]} records */ function countModels(records) { const fromProd = new Set(); const fromNightly = new Set(); const toProd = new Set(); const toNightly = new Set(); for (const record of records) { const isRelease = !record.filter_expression || record.filter_expression.includes("env.channel == 'release'"); if (record.fromLang == "en") { if (isRelease) { toProd.add(record.toLang); } else { toNightly.add(record.toLang); } } else { if (isRelease) { fromProd.add(record.fromLang); } else { fromNightly.add(record.fromLang); } } } const toNightlyOnly = toNightly.difference(toProd); const fromNightlyOnly = fromNightly.difference(fromProd); getElement("fromProd").innerText = String(fromProd.size); getElement("toProd").innerText = String(toProd.size); getElement("fromNightly").innerText = String(toNightlyOnly.size); getElement("toNightly").innerText = String(fromNightlyOnly.size); }