def scrape_ec2_pricing()

in contrib/scrape-ec2-prices.py [0:0]


def scrape_ec2_pricing():
    skus = {}
    prices = get_all_prices()
    json_file, from_file = get_json()
    with open(json_file) as f:
        print("Starting to parse pricing data, this could take up to 15 minutes...")
        # use parser because file is very large
        parser = ijson.parse(f, buf_size=IJSON_BUF_SIZE)
        current_sku = ""

        for prefix, event, value in tqdm.tqdm(parser):
            if "terms" in prefix:
                break
            if (prefix, event) == ("products", "map_key"):
                current_sku = value
                skus[current_sku] = {"sku": value}
            elif (prefix, event) == (f"products.{current_sku}.productFamily", "string"):
                skus[current_sku]["family"] = value
            elif (prefix, event) == (
                f"products.{current_sku}.attributes.location",
                "string",
            ):
                skus[current_sku]["locationName"] = value
            elif (prefix, event) == (
                f"products.{current_sku}.attributes.locationType",
                "string",
            ):
                skus[current_sku]["locationType"] = value
            elif (prefix, event) == (
                f"products.{current_sku}.attributes.instanceType",
                "string",
            ):
                skus[current_sku]["size"] = value
            elif (prefix, event) == (
                f"products.{current_sku}.attributes.operatingSystem",
                "string",
            ):
                skus[current_sku]["os"] = value
            elif (prefix, event) == (
                f"products.{current_sku}.attributes.usagetype",
                "string",
            ):
                skus[current_sku]["usage_type"] = value
            elif (prefix, event) == (
                f"products.{current_sku}.attributes.preInstalledSw",
                "string",
            ):
                skus[current_sku]["preInstalledSw"] = value
            elif (prefix, event) == (
                f"products.{current_sku}.attributes.regionCode",
                "string",
            ):
                skus[current_sku]["location"] = value
            # only get prices of compute instances atm
            elif (prefix, event) == (f"products.{current_sku}", "end_map"):
                if (
                    "Compute Instance" not in skus[current_sku]["family"]
                    and "Dedicated Host" not in skus[current_sku]["family"]
                ):
                    del skus[current_sku]

    ec2_linux = defaultdict(OrderedDict)
    ec2_windows = defaultdict(OrderedDict)
    ec2_rhel = defaultdict(OrderedDict)
    ec2_rhel_ha = defaultdict(OrderedDict)
    ec2_suse = defaultdict(OrderedDict)

    os_map = {
        "Linux": ec2_linux,
        "Windows": ec2_windows,
        "RHEL": ec2_rhel,
        "SUSE": ec2_suse,
        "Red Hat Enterprise Linux with HA": ec2_rhel_ha,
    }
    for sku in skus:
        if skus[sku]["locationType"] != "AWS Region":
            continue
        # skip any SQL
        if skus[sku]["preInstalledSw"] != "NA":
            continue

        os = skus[sku]["os"]
        if os == "NA":
            continue
        os_dict = os_map.get(os)
        # new OS, until it is documented skip it
        if os_dict is None:
            print(f"Unexpected OS {os}")
            continue
        size = skus[sku]["size"]
        location = skus[sku]["location"]
        # size is first seen
        if not os_dict.get(size):
            os_dict[size] = {}

        # if price already exists pick the BoxUsage usage type which means on demand
        if os_dict.get(size, {}).get(location) and "BoxUsage" not in skus[sku]["usage_type"]:
            continue

        # if price is not a number then label it as not available
        try:
            price = float(prices[sku]["price"])
            os_dict[size][location] = price
        except ValueError:
            os_dict[size][location] = "n/a"
        except KeyError:
            # size is available only reserved
            del os_dict[size]
    return {
        "ec2_linux": ec2_linux,
        "ec2_windows": ec2_windows,
        "ec2_rhel": ec2_rhel,
        "ec2_suse": ec2_suse,
        "ec2_rhel_ha": ec2_rhel_ha,
    }