def main()

in src/drift_detector.py [0:0]


def main(train_s3_uri, test_s3_uri, target_label):    
    train_df = utils.s3_to_df(train_s3_uri)
    train_df.drop([target_label], axis=1, inplace=True)
    
    test_df = utils.s3_to_df(test_s3_uri)
    
    infer_dir = os.environ['dataset_source']
    
    infer_df_list = []
    for filepath in pathlib.Path(infer_dir).rglob('*.jsonl'):
        print(filepath)
        df = utils.df_from_datacapture(filepath.absolute(), train_df.columns.to_list())
        infer_df_list.append(df)
        
    infer_df = pd.concat(infer_df_list)
    
    drift_df = compute_drift(train_df, infer_df)
    accuracy = compute_accuracy_with_drift(test_df, infer_df, target_label)
    
    output = {
        'accuracy': accuracy, 
        'drift_df': drift_df.to_json(),
        'end_time': os.environ['end_time']
    }

    with open(f"{os.environ['output_path']}/results.json", 'w') as f:
        json.dump(output, f)