in src/server/helpers/user_reports_transform.ts [9:83]
export async function fetchUserReports(projectId: string, paramFrom: string, paramTo: string, logger: Logger) {
logger.verbose("Connecting to BigQuery...");
const bq = getBqConnection(projectId);
logger.verbose("Starting queries...");
// Note: this looks weird - but it makes sure the queries run in parallel.
// Since BQ has some initial latency when responding, this matters.
// [ToDo] Investigate whether using QueryJobs makes sense here.
const [[rawReports], [rawUrlPatterns]] = await Promise.all([
bq.query({
query: `
SELECT
reports.document_id AS uuid,
CAST(reports.submission_timestamp AS DATETIME) AS reported_at,
reports.client_info.app_display_version AS app_version,
reports.metrics.string.broken_site_report_breakage_category AS breakage_category,
reports.metrics.string.broken_site_report_tab_info_antitracking_block_list AS tp_status,
reports.metrics.text2.broken_site_report_browser_info_app_default_useragent_string as ua_string,
reports.metrics.text2.broken_site_report_description AS comments,
reports.metrics.url2.broken_site_report_url AS url,
reports.normalized_app_name AS app_name,
reports.normalized_channel AS app_channel,
reports.metadata.user_agent.version AS app_major_version,
ARRAY(
SELECT label
FROM webcompat_user_reports.labels
WHERE report_uuid = reports.document_id
) as labels,
bp.label as prediction,
bp.probability as prob,
ml_trans.translated_text AS translated_comments,
ml_trans.language_code AS translated_from,
# If the report is from Windows, we have to use a bit of mozfun to figure out the human-readable Windows
# version for the build number. But this function only works on Windows pings, so we have to gate it.
IF (
client_info.windows_build_number IS NOT NULL,
mozfun.norm.windows_version_info('Windows_NT', client_info.os_version, client_info.windows_build_number),
reports.normalized_os
) as os,
# We want to exclude reports that have been actioned upon, but we do this later during processing. That's
# because we want to limit the workload to only the "top 10" reports, and we want to make that list stable, so
# removing already-actioned reports has to happen after sorting and grouping.
CASE WHEN EXISTS (
SELECT 1 FROM webcompat_user_reports.report_actions
WHERE report_actions.report_uuid = reports.document_id
AND report_actions.type != "mark-valid"
)
THEN true ELSE false END AS has_actions
FROM moz-fx-data-shared-prod.firefox_desktop.broken_site_report as reports
LEFT JOIN webcompat_user_reports.bugbug_predictions AS bp ON reports.document_id = bp.report_uuid
LEFT JOIN webcompat_user_reports.translations AS ml_trans ON reports.document_id = ml_trans.report_uuid
WHERE
reports.submission_timestamp BETWEEN TIMESTAMP(?) and TIMESTAMP(DATE_ADD(?, interval 1 day))
ORDER BY CHAR_LENGTH(comments) DESC
;
`,
params: [paramFrom, paramTo],
}),
bq.query({
query: [
"SELECT patterns.*, bugs.title",
"FROM webcompat_knowledge_base.url_patterns AS patterns",
"LEFT JOIN webcompat_knowledge_base.bugzilla_bugs AS bugs ON patterns.bug = bugs.number",
].join(" "),
}),
]);
logger.verbose(`Received ${rawReports.length} user reports and ${rawUrlPatterns.length} URL patterns.`);
return {
rawReports,
rawUrlPatterns,
};
}