in src/server/helpers/classified_reports_transform.ts [84:162]
export function transformClassifiedReports(
rawReports: any[],
rawUrlPatterns: any[],
paramPrediction: string,
logger: Logger,
) {
logger.verbose("Pre-processing URL patterns...");
const preprocessedUrlPatterns = rawUrlPatterns.map((pattern: UrlPattern) => {
const newPattern = Object.assign({}, pattern);
// [ToDo] We probably should build actual RegExp matching, so this should
// generate a matchable RegExp
newPattern.url_pattern = pattern.url_pattern.replace("*", "");
return newPattern;
});
const normalizeHostname = _.memoize((hostname: string) => {
if (isIP(hostname)) {
return hostname;
}
const parsedDomain = psl.parse(hostname);
return (parsedDomain as psl.ParsedDomain).domain || "[unknown]";
});
logger.verbose("Pre-processing classified reports...");
const preprocessedReports = rawReports
.filter((report: ClassifiedReport) => {
// [ToDo] some reports currently don't have a URL attached. This breaks
// all kinds of assumptions here, so let's remove them for now. Tom is
// investigating why this happens.
return !!report.url;
})
.filter((report: ClassifiedReport) => {
if (!paramPrediction || paramPrediction === "all") {
return true;
}
return report.prediction === paramPrediction;
})
.map((report: ClassifiedReport) => {
const newReport = Object.assign({}, report);
// For some reason, the reported_at as it comes out of the database is
// actually an object {value: "[timestamp]"}.
// [ToDo] figure out why, and if this is something that could change
newReport.reported_at = (report as any).reported_at.value;
newReport.related_bugs = preprocessedUrlPatterns
.filter((pattern) => report.url.includes(pattern.url_pattern))
.map((pattern) => ({
number: pattern.bug,
title: pattern.title,
}));
const parsedUrl = new URL(newReport.url);
newReport.root_domain = normalizeHostname(parsedUrl.hostname);
return newReport;
});
logger.verbose("Writing response...");
const soredReports = preprocessedReports.sort((a, b) => {
if (paramPrediction === "invalid" || paramPrediction === "valid") {
return a.prob - b.prob;
}
if (a.prediction === b.prediction) {
// If labels are the same, sort by descending probability for 'valid'
// and ascending probability for 'invalid'
return a.prediction === "valid" ? b.prob - a.prob : a.prob - b.prob;
}
// Prioritize 'valid' over 'invalid'
return a.prediction === "valid" ? -1 : 1;
});
return JSON.stringify(soredReports);
}