src/data/source.ts (217 lines of code) (raw):

import { emptyPings, pingFields, PingFieldType } from "./format"; import type { Pings, StringIndex, IStringData, IndexedStringPingField } from "./format"; import { createResource, createRoot, createSignal } from "solid-js"; import settings from "app/settings"; export type IndexArray = Uint8Array | Uint16Array | Uint32Array; enum SizeHint { Uint8, Uint16, Uint32 } const SIZE_HINTS: Record<IndexedStringPingField, SizeHint> = { channel: SizeHint.Uint8, process: SizeHint.Uint8, ipc_actor: SizeHint.Uint8, clientid: SizeHint.Uint32, version: SizeHint.Uint8, os: SizeHint.Uint8, osversion: SizeHint.Uint8, arch: SizeHint.Uint8, date: SizeHint.Uint16, reason: SizeHint.Uint16, type: SizeHint.Uint8, build_id: SizeHint.Uint8, signature: SizeHint.Uint16 }; export class IString<S> { readonly strings: S[]; readonly values: IndexArray; constructor(strings: S[], values: IndexArray) { this.strings = strings; this.values = values; } getPingString(ping: Ping): S { return this.strings[this.values[ping]]; } }; class IStringBuilder<S> { #stringIndex = new Map<S, StringIndex>(); #strings: S[] = []; #values: IndexArray; #sizeLimit: number; /** * Providing a size hint allows more efficient memory usage and avoids * copying the data if the size is wrong or if we were to wait until size * information is known after loading everything (e.g., when the `build()` * method is called). */ constructor(totalPings: number, hint: SizeHint = SizeHint.Uint32) { switch (hint) { case SizeHint.Uint8: this.#values = new Uint8Array(totalPings); break; case SizeHint.Uint16: this.#values = new Uint16Array(totalPings); break; case SizeHint.Uint32: this.#values = new Uint32Array(totalPings); break; } this.#sizeLimit = Math.pow(256, this.#values.BYTES_PER_ELEMENT); } addData(offset: number, istringData: IStringData<S, StringIndex[]>) { const indexMapping: StringIndex[] = new Array(istringData.strings.length); let iMappingInd = 0; for (const s of istringData.strings) { const existing = this.#stringIndex.get(s); let mappedValue; if (existing !== undefined) { mappedValue = existing; } else { if (this.#strings.length === this.#sizeLimit) { if (this.#values.BYTES_PER_ELEMENT === 1) { this.#values = new Uint16Array(this.#values); } else { this.#values = new Uint32Array(this.#values); } this.#sizeLimit = Math.pow(256, this.#values.BYTES_PER_ELEMENT); } this.#stringIndex.set(s, this.#strings.length); mappedValue = this.#strings.length; this.#strings.push(s); } indexMapping[iMappingInd++] = mappedValue; } this.#values.set(istringData.values.map(v => indexMapping[v]), offset); } build(): IString<S> { return new IString(this.#strings, this.#values); } } export type AllPings = Pings<IString<string>, IString<string | null>>; /** An index into `AllPings` data. */ export type Ping = number; // We can make equality checks and set operations very fast by deduping strings // and keeping indexed strings as-is. // // If we didn't care much about the set operations we could also evaluate the // string indices since at this point it will be efficient in memory either // way, but the volume of data necessitates higher performance filtering. async function joinData(allData: UrlFetchedSource[]): Promise<AllPings> { const totalPings = allData.reduce((sum, d) => sum + d.data.crashid.length, 0); const pings = emptyPings( k => new IStringBuilder(totalPings, SIZE_HINTS[k]), () => new Array(totalPings), ) as Pings<IStringBuilder<string>, IStringBuilder<string | null>>; // Give the UI a chance to show status changes periodically. const showStatusChanges = () => new Promise(resolve => setTimeout(resolve, 0)); let offset = 0; for (const { source, data } of allData) { source.setStatus({ message: "merging" }); await showStatusChanges(); // Populate the return data. for (const [field, desc] of pingFields()) { // Change indices as necessary. if (desc.type === PingFieldType.IndexedString) { const f = field as IndexedStringPingField; pings[f].addData(offset, data[f] as any); } else { const src = data[field] as any[]; const dest = pings[field] as any[]; for (let i = 0; i < src.length; i++) { dest[offset + i] = src[i]; } } } offset += data.crashid.length; source.setStatus({ success: true, message: `loaded ${data.crashid.length} pings` }); } await showStatusChanges(); // Build the IStringBuilders for (const [field, _] of pingFields().filter(([_, d]) => d.type === PingFieldType.IndexedString)) { const f = field as IndexedStringPingField; (pings as any)[f] = pings[f].build(); } // XXX We don't currently use the minidump hashes, so clear them out to save memory. // They take up enough memory that we might consider dynamically fetching // them or truncating them. (pings.minidump_sha256_hash as any) = []; return pings as any; } export type SourceStatus = { success?: boolean, message: string; }; export interface Source { readonly date: string; status(): SourceStatus; } class UrlSource implements Source { readonly date: string; readonly url: string; readonly status: () => SourceStatus; readonly setStatus: (status: SourceStatus) => void; constructor(date: string) { this.date = date; this.url = `ping_data/${date}`; const [status, setStatus] = createSignal<SourceStatus>({ message: "requesting" }); this.status = status; this.setStatus = setStatus; } } type FetchedSource = { source: Source, data?: Pings, etag?: string, }; type UrlFetchedSource = { source: UrlSource, data: Pings, }; function checkAndUpdateEtags(sources: FetchedSource[]) { let newEtags: string[] | undefined = sources.map(s => s.etag ?? ""); if (newEtags.every(s => s.length === 0)) { newEtags = undefined; } if (!newEtags) return; if (settings.data_etags) { let mismatch = settings.data_etags.length !== newEtags.length; if (!mismatch) { for (let i = 0; i < newEtags.length; i++) { if (settings.data_etags[i] !== newEtags[i]) { mismatch = true; break; } } } if (mismatch) { alert("Warning: the source data has changed since the link was created."); } else { // No need to update data_etags (which will cause the settings to // fire a change, even if it's the same data). return; } } settings.data_etags = newEtags; } const RETRY_TIME_MS = 2000; async function fetchSource(source: UrlSource, signal: AbortSignal): Promise<FetchedSource> { try { let response = await fetch(source.url, { signal }); // Retry fetches as long as 202 status is returned. while (response.status === 202) { source.setStatus({ message: "querying database" }); await new Promise(resolve => setTimeout(resolve, RETRY_TIME_MS)); response = await fetch(source.url, { signal }); } source.setStatus({ message: "downloading" }); const data: Pings = await response.json(); if (data.crashid.length === 0) { source.setStatus({ success: false, message: "not available" }); return { source }; } source.setStatus({ message: "downloaded" }); const etag = response.headers.get("ETag") ?? undefined; return { source, data, etag }; } catch (error) { source.setStatus({ success: false, message: `failed: ${error}` }); return { source }; } } let abortController: AbortController | undefined; async function fetchSources(sources: UrlSource[]): Promise<AllPings> { if (abortController) abortController.abort(); abortController = new AbortController(); const allData = await Promise.all(sources.map(s => fetchSource(s, abortController!.signal))); checkAndUpdateEtags(allData); return await joinData(allData.filter(s => s.data !== undefined) as UrlFetchedSource[]); } const { urlSources, setSources, allPings } = createRoot(() => { const [sources, setSources] = createSignal<UrlSource[]>([]); const [allPings] = createResource(sources, fetchSources, { initialValue: emptyPings(() => new IString([], new Uint8Array())) }); return { urlSources: sources, setSources, allPings }; }); export function setDates(dates: string[]) { setSources(dates.map(d => new UrlSource(d))); } export function sources(): Source[] { return urlSources(); } export { allPings };