export function transformClassifiedReports()

in src/server/helpers/classified_reports_transform.ts [84:162]


export function transformClassifiedReports(
  rawReports: any[],
  rawUrlPatterns: any[],
  paramPrediction: string,
  logger: Logger,
) {
  logger.verbose("Pre-processing URL patterns...");
  const preprocessedUrlPatterns = rawUrlPatterns.map((pattern: UrlPattern) => {
    const newPattern = Object.assign({}, pattern);

    // [ToDo] We probably should build actual RegExp matching, so this should
    // generate a matchable RegExp
    newPattern.url_pattern = pattern.url_pattern.replace("*", "");

    return newPattern;
  });

  const normalizeHostname = _.memoize((hostname: string) => {
    if (isIP(hostname)) {
      return hostname;
    }

    const parsedDomain = psl.parse(hostname);
    return (parsedDomain as psl.ParsedDomain).domain || "[unknown]";
  });

  logger.verbose("Pre-processing classified reports...");
  const preprocessedReports = rawReports
    .filter((report: ClassifiedReport) => {
      // [ToDo] some reports currently don't have a URL attached. This breaks
      // all kinds of assumptions here, so let's remove them for now. Tom is
      // investigating why this happens.
      return !!report.url;
    })
    .filter((report: ClassifiedReport) => {
      if (!paramPrediction || paramPrediction === "all") {
        return true;
      }
      return report.prediction === paramPrediction;
    })
    .map((report: ClassifiedReport) => {
      const newReport = Object.assign({}, report);

      // For some reason, the reported_at as it comes out of the database is
      // actually an object {value: "[timestamp]"}.
      // [ToDo] figure out why, and if this is something that could change
      newReport.reported_at = (report as any).reported_at.value;

      newReport.related_bugs = preprocessedUrlPatterns
        .filter((pattern) => report.url.includes(pattern.url_pattern))
        .map((pattern) => ({
          number: pattern.bug,
          title: pattern.title,
        }));

      const parsedUrl = new URL(newReport.url);
      newReport.root_domain = normalizeHostname(parsedUrl.hostname);

      return newReport;
    });

  logger.verbose("Writing response...");

  const soredReports = preprocessedReports.sort((a, b) => {
    if (paramPrediction === "invalid" || paramPrediction === "valid") {
      return a.prob - b.prob;
    }

    if (a.prediction === b.prediction) {
      // If labels are the same, sort by descending probability for 'valid'
      // and ascending probability for 'invalid'
      return a.prediction === "valid" ? b.prob - a.prob : a.prob - b.prob;
    }
    // Prioritize 'valid' over 'invalid'
    return a.prediction === "valid" ? -1 : 1;
  });

  return JSON.stringify(soredReports);
}