public static void scrub()

in ingestion-beam/src/main/java/com/mozilla/telemetry/decoder/MessageScrubber.java [224:372]


  public static void scrub(Map<String, String> attributes, ObjectNode json)
      throws MessageShouldBeDroppedException, AffectedByBugException {

    final String namespace = attributes.get(Attribute.DOCUMENT_NAMESPACE);
    final String docType = attributes.get(Attribute.DOCUMENT_TYPE);
    final String appName = attributes.get(Attribute.APP_NAME);
    final String appVersion = attributes.get(Attribute.APP_VERSION);
    final String appUpdateChannel = attributes.get(Attribute.APP_UPDATE_CHANNEL);
    final String appBuildId = attributes.get(Attribute.APP_BUILD_ID);
    // NOTE: these values may be null
    final String userAgent = attributes.get(Attribute.USER_AGENT);
    final String xTelemetryAgent = attributes.get(Attribute.X_TELEMETRY_AGENT);

    // Check for toxic data that should be dropped without sending to error output.
    if (ParseUri.TELEMETRY.equals(namespace) && "crash".equals(docType)
        && "nightly".equals(appUpdateChannel) && "20190719094503".equals(appBuildId) //
        && Optional.of(json) // payload.metadata.MozCrashReason
            .map(j -> j.path("payload").path("metadata").path("MozCrashReason").textValue())
            .filter(s -> s.contains("do not use eval with system privileges")) //
            .isPresent()) {
      throw new MessageShouldBeDroppedException("1567596");
    }
    if (ParseUri.TELEMETRY.equals(namespace) && "crash".equals(docType)
        && (("nightly".equals(appUpdateChannel)
            && (appVersion.startsWith("68") || appVersion.startsWith("69")))
            || ("beta".equals(appUpdateChannel) && appVersion.startsWith("68")))
        && Optional.of(json) // payload.metadata.RemoteType
            .map(j -> j.path("payload").path("metadata").path("RemoteType").textValue())
            .filter(s -> s.startsWith("webIsolated=")) //
            .isPresent()) {
      throw new MessageShouldBeDroppedException("1562011");
    }
    if (ParseUri.TELEMETRY.equals(namespace) && "bhr".equals(docType)
        && (appVersion.startsWith("68") || appVersion.startsWith("69")) //
        && Optional.of(json) // payload.hangs[].remoteType
            .map(j -> j.path("payload").path("hangs").elements()) //
            .map(Streams::stream).orElseGet(Stream::empty).map(j -> j.path("remoteType")) //
            .filter(JsonNode::isTextual) //
            .anyMatch(j -> j.textValue().startsWith("webIsolated="))) {
      throw new MessageShouldBeDroppedException("1562011");
    }
    if ("account-ecosystem".equals(docType)) {
      throw new MessageShouldBeDroppedException("1697602");
    }
    if ("firefox-desktop".equals(namespace) && "background-update".equals(docType)) {
      throw new MessageShouldBeDroppedException("1784911");
    }
    if ("firefox-desktop-background-update".equals(namespace)
        && "new-metric-capture-emulation".equals(docType)) {
      throw new MessageShouldBeDroppedException("1817821");
    }

    if (bug1712850Affected(attributes)) {
      if (json.hasNonNull("search_query") || json.hasNonNull("matched_keywords")) {
        throw new MessageShouldBeDroppedException("1712850");
      }
    }

    // Check for unwanted data; these messages aren't thrown out, but this class of errors will be
    // ignored for most pipeline monitoring.
    if (IGNORED_NAMESPACES.containsKey(namespace)) {
      throw new UnwantedDataException(IGNORED_NAMESPACES.get(namespace));
    }

    if (ParseUri.TELEMETRY.equals(namespace) && IGNORED_TELEMETRY_DOCTYPES.containsKey(docType)) {
      throw new UnwantedDataException(IGNORED_TELEMETRY_DOCTYPES.get(docType));
    }

    if ("FirefoxOS".equals(appName)) {
      throw new UnwantedDataException("1618684");
    }

    // These document types receive a significant number of pings with malformed `build_id`s due to
    // third-party builds where `appName != "Firefox"`
    if (ParseUri.TELEMETRY.equals(namespace) && FIREFOX_ONLY_DOCTYPES.contains(docType)
        && !"Firefox".equals(appName)) {
      throw new UnwantedDataException("1592010");
    }

    // Up to the v0.13 Glean enforces a particular user-agent string
    // that a rogue fuzzer is not abiding by
    // https://searchfox.org/mozilla-central/source/third_party/rust/glean-core/src/upload/request.rs#35,72-75
    // The Glean SDK stopped submitting a special user-agent after v44.0.0, sending that value
    // in the X-Telemetry-Agent header instead. We require one of the two to be valid;
    // see https://bugzilla.mozilla.org/show_bug.cgi?id=1766424
    if ("firefox-desktop".equals(namespace)) {
      boolean isValidGleanAgentOldStyle = Strings.nullToEmpty(userAgent).startsWith("Glean");
      boolean isValidGleanAgentNewStyle = Strings.nullToEmpty(xTelemetryAgent).startsWith("Glean");
      if (!isValidGleanAgentOldStyle && !isValidGleanAgentNewStyle) {
        throw new UnwantedDataException("1684980");
      }
    }

    // Check for other signatures that we want to send to error output, but which should appear
    // in normal pipeline monitoring.
    if (bug1489560Affected(attributes, json)) {
      // See also https://bugzilla.mozilla.org/show_bug.cgi?id=1614428
      throw new AffectedByBugException("1489560");
    }

    // No such docType: default-browser-agent/1
    if ("default-browser-agent".equals(namespace) && "1".equals(docType)) {
      throw new UnwantedDataException("1626020");
    }

    // Redactions (message is altered, but allowed through).
    if (bug1602844Affected(attributes)) {
      json.path("events").elements().forEachRemaining(event -> {
        JsonNode eventMapValues = event.path(5);
        if (eventMapValues.has("fxauid")) {
          ((ObjectNode) eventMapValues).replace("fxauid", NullNode.getInstance());
        }
        markBugCounter("1602844");
      });
    }

    if (bug1162183Affected(attributes)) {
      JsonNode payload = json.path("payload");
      if (payload.has("slowSQL")) {
        ((ObjectNode) payload).remove("slowSQL");
        markBugCounter("1162183");
      }
    }

    if (bug1642386Affected(attributes)) {
      json.path("payload").path("syncs").elements().forEachRemaining(syncItem -> {
        syncItem.path("engines").elements().forEachRemaining(engine -> {
          ((ObjectNode) engine).remove("outgoing");
          markBugCounter("1642386");
        });
      });
    }

    if (ParseUri.TELEMETRY.equals(namespace) && "main".equals(docType)) {
      processForBug1751753(json);
    }

    if ("metrics".equals(docType) && namespace != null
        && BUG_1751955_AFFECTED_NAMESPACES.contains(namespace)) {
      processForBug1751955(json);
    }

    // Data collected prior to glean.js 0.17.0 is effectively useless.
    if (bug1733118Affected(namespace, docType, json)) {
      // See also https://bugzilla.mozilla.org/show_bug.cgi?id=1733118
      throw new AffectedByBugException("1733118");
    }

  }