1. Amazon SageMaker Processing/preprocess.py [214:253]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    return

def _read_json(path):  # type: (str) -> dict
    """Read a JSON file.
    Args:
        path (str): Path to the file.
    Returns:
        (dict[object, object]): A dictionary representation of the JSON file.
    """
    with open(path, "r") as f:
        return json.load(f)

def main(base_dir: str, args: argparse.Namespace):
    # Input data files
    input_dir = os.path.join(base_dir, "input/data")
    input_file_list = glob.glob(f"{input_dir}/*.csv")
    logger.info(f"Input file list: {input_file_list}")

    hosts = _read_json("/opt/ml/config/resourceconfig.json")
    logger.info(hosts)
    current_host = hosts["current_host"]
    logger.info(current_host)
        
    if len(input_file_list) == 0:
        raise Exception(f"No input files found in {input_dir}")

    # Input zones file
    zones_dir = os.path.join(base_dir, "input/zones")
    zones_file = os.path.join(zones_dir, "taxi_zones.zip")
    if not os.path.exists(zones_file):
        raise Exception(f"Zones file {zones_file} does not exist")

    # Extract and load taxi zones geopandas dataframe
    extract_zones(zones_file, zones_dir)
    zone_df = load_zones(zones_dir)

    # Load input files
    data_df = load_data(input_file_list)
    data_df = enrich_data(data_df, zone_df)
    data_df, data_fg = clean_data(data_df)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


5. MLOps SageMaker Project/sagemaker-workshop-preprocess-seedcode-v1/pipelines/preprocess/preprocess.py [174:213]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    return

def _read_json(path):  # type: (str) -> dict
    """Read a JSON file.
    Args:
        path (str): Path to the file.
    Returns:
        (dict[object, object]): A dictionary representation of the JSON file.
    """
    with open(path, "r") as f:
        return json.load(f)

def main(base_dir: str, args: argparse.Namespace):
    # Input data files
    input_dir = os.path.join(base_dir, "input/data")
    input_file_list = glob.glob(f"{input_dir}/*.csv")
    logger.info(f"Input file list: {input_file_list}")

    hosts = _read_json("/opt/ml/config/resourceconfig.json")
    logger.info(hosts)
    current_host = hosts["current_host"]
    logger.info(current_host)
        
    if len(input_file_list) == 0:
        raise Exception(f"No input files found in {input_dir}")

    # Input zones file
    zones_dir = os.path.join(base_dir, "input/zones")
    zones_file = os.path.join(zones_dir, "taxi_zones.zip")
    if not os.path.exists(zones_file):
        raise Exception(f"Zones file {zones_file} does not exist")

    # Extract and load taxi zones geopandas dataframe
    extract_zones(zones_file, zones_dir)
    zone_df = load_zones(zones_dir)

    # Load input files
    data_df = load_data(input_file_list)
    data_df = enrich_data(data_df, zone_df)
    data_df, data_fg = clean_data(data_df)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -