def parse()

in contrib/scrape-ec2-sizes.py [0:0]


def parse():
    # Set vars
    sizes = {}
    regions = {r["id"]: r for r in REGION_DETAILS.values()}
    for region_id in regions:
        regions[region_id]["instance_types"] = []
    # Parse
    json_file, from_file = get_json()
    products_data = ijson.items(json_file, "products")

    try:
        products_data = next(products_data)
    except ijson.common.IncompleteJSONError as e:
        # This likely indicates that the cached file is incomplete or corrupt so we delete it and re
        # download data
        if from_file:
            os.remove(FILEPATH)
            json_file, from_file = get_json()
            products_data = ijson.items(json_file, "products")
            products_data = next(products_data)
        else:
            raise e

    for sku in products_data:
        if products_data[sku].get("productFamily", "unknown") != "Compute Instance":
            continue
        location = products_data[sku]["attributes"].pop("location")
        if location not in REGION_DETAILS:
            continue
        # Get region & size ID
        region_id = REGION_DETAILS[location]["id"]
        instance_type = products_data[sku]["attributes"]["instanceType"]
        # Add size to region
        if instance_type not in regions[region_id]["instance_types"]:
            regions[region_id]["instance_types"].append(instance_type)
        # Parse sizes
        if instance_type not in sizes:
            for field in IGNORED_FIELDS:
                products_data[sku]["attributes"].pop(field, None)
            # Compute RAM
            ram = int(
                float(products_data[sku]["attributes"]["memory"].split()[0].replace(",", "")) * 1024
            )
            # Compute bandwdith
            bw_match = REG_BANDWIDTH.match(products_data[sku]["attributes"]["networkPerformance"])
            if bw_match is not None:
                bandwidth = int(bw_match.groups()[0])
            else:
                bandwidth = None
            sizes[instance_type] = {
                "id": instance_type,
                "name": instance_type,
                "ram": ram,
                "bandwidth": bandwidth,
                "extra": filter_extras(products_data[sku]["attributes"]),
            }
            if products_data[sku]["attributes"].get("storage") != "EBS only":
                match = REG1_STORAGE.match(products_data[sku]["attributes"]["storage"])
                if match:
                    disk_number, disk_size = match.groups()
                else:
                    match = REG2_STORAGE.match(products_data[sku]["attributes"]["storage"])
                    if match:
                        disk_number, disk_size = 1, match.groups()[0]
                    else:
                        disk_number, disk_size = 0, "0"
                disk_number, disk_size = (
                    int(disk_number),
                    int(disk_size.replace(",", "")),
                )
                sizes[instance_type]["disk"] = disk_number * disk_size
            else:
                sizes[instance_type]["disk"] = 0
            products_data[sku]["attributes"]
    # Sort
    for region in regions:
        regions[region]["instance_types"] = sorted(regions[region]["instance_types"])
    return sizes, regions