def load_data()

in 1. Amazon SageMaker Processing/preprocess.py [0:0]


def load_data(file_list: list):
    # Define dates, and columns to use
    use_cols = [
        "fare_amount",
        "lpep_pickup_datetime",
        "lpep_dropoff_datetime",
        "passenger_count",
        "PULocationID",
        "DOLocationID",
    ]
    # Concat input files with select columns
    dfs = []
    for file in file_list:
        dfs.append(pd.read_csv(file, usecols=use_cols))
    return pd.concat(dfs, ignore_index=True)