def processbundlesmanifest()

in hgext/hgmo/__init__.py [0:0]


def processbundlesmanifest(orig, repo, proto, *args, **kwargs):
    """Wraps `wireprotov1server.clonebundles` and `wireprotov1server.clonebundles_2`.

    We examine source IP addresses and advertise URLs for the same
    AWS region if the source is in AWS.
    """

    # Call original fxn wireproto.clonebundles
    manifest = orig(repo, proto, *args)

    if not isinstance(proto, webproto):
        return manifest

    # Get path for Mozilla, AWS, GCP network prefixes. Return if missing
    mozpath = repo.ui.config(b"hgmo", b"mozippath")
    awspath = repo.ui.config(b"hgmo", b"awsippath")
    gcppath = repo.ui.config(b"hgmo", b"gcpippath")
    azureippath = repo.ui.config(b"hgmo", b"azureippath")
    if not awspath and not mozpath and not gcppath and not azureippath:
        return manifest

    # Mozilla's load balancers add a X-Cluster-Client-IP header to identify the
    # actual source IP, so prefer it.  And if the request comes through Fastly,
    # prefer the Fastly-Client-IP header it adds so we take into account the
    # actual client.
    sourceip = proto._req.headers.get(
        b"Fastly-Client-IP",
        proto._req.headers.get(
            b"X-CLUSTER-CLIENT-IP", proto._req.rawenv.get(b"REMOTE_ADDR")
        ),
    )

    if not sourceip:
        return manifest
    else:
        try:
            sourceip = ipaddress.ip_address(pycompat.unicode(pycompat.sysstr(sourceip)))
        except ValueError:
            # XXX return 400?
            return manifest

    # If the request originates from a private IP address, and we are running on
    # a cloud instance, we should be serving traffic to private instances in CI.
    # Grab the region from the instance_data.json object and serve the correct
    # manifest accordingly
    instance_data_path = repo.ui.config(b"hgmo", b"instance-data-path")
    if (
        instance_data_path
        and sourceip.is_private
        and os.path.exists(instance_data_path)
    ):
        with open(instance_data_path, "rb") as fh:
            instance_data = json.load(fh)

        return filter_manifest_for_region(
            manifest, cloud_region_specifier(instance_data)
        )

    # If the AWS IP file path is set and some line in the manifest includes an ec2 region,
    # we will check if the request came from AWS to server optimized bundles.
    if awspath and b"ec2region=" in manifest.data:
        try:
            with open(awspath, "rb") as fh:
                awsdata = json.load(fh)

            for ipentry in awsdata["prefixes"]:
                network = ipaddress.IPv4Network(ipentry["ip_prefix"])

                if sourceip not in network:
                    continue

                region = ipentry["region"]

                return filter_manifest_for_region(
                    manifest, b"ec2region=%s" % pycompat.bytestr(region)
                )

        except Exception as e:
            repo.ui.log(b"hgmo", b"exception filtering AWS bundle source IPs: %s\n", e)

    # If the GCP IP file path is set and some line in the manifest includes a GCE region,
    # we will check if the request came from GCP to serve optimized bundles
    if gcppath and b"gceregion=" in manifest.data:
        try:
            with open(gcppath, "rb") as f:
                gcpdata = json.load(f)

            for ipentry in gcpdata["prefixes"]:
                if "ipv4Prefix" in ipentry:
                    network = ipaddress.IPv4Network(ipentry["ipv4Prefix"])
                elif "ipv6Prefix" in ipentry:
                    network = ipaddress.IPv6Network(ipentry["ipv6Prefix"])

                if sourceip not in network:
                    continue

                region = ipentry["scope"]

                return filter_manifest_for_region(
                    manifest, b"gceregion=%s" % pycompat.bytestr(region)
                )

        except Exception as e:
            repo.ui.log(b"hgmo", b"exception filtering GCP bundle source IPs: %s\n", e)

    if azureippath and b"azureregion=" in manifest.data:
        try:
            azure_region = get_current_azure_region(azureippath, sourceip)
            if azure_region:
                return filter_manifest_for_region(
                    manifest, b"azureregion=%s" % pycompat.bytestr(azure_region)
                )
        except Exception as e:
            repo.ui.log(
                b"hgmo", b"exception filtering Azure bundle source IPs: %s\n", e
            )

    # Determine if source IP is in a Mozilla network, as we stream results to those addresses
    if mozpath:
        try:
            with open(mozpath, "r") as fh:
                mozdata = fh.read().splitlines()

            for ipentry in mozdata:
                network = ipaddress.IPv4Network(
                    pycompat.unicode(pycompat.sysstr(ipentry))
                )

                # If the source IP is from a Mozilla network, prioritize stream bundles
                if sourceip in network:
                    origlines = sorted(manifest.data.splitlines(), key=stream_clone_key)
                    origlines.append(b"")
                    return b"\n".join(origlines)

        except Exception as e:
            repo.ui.log(b"hgmo", b"exception filtering bundle source IPs: %s\n", e)
            return manifest

    return manifest