in src/drift_detector.py [0:0]
def main(train_s3_uri, test_s3_uri, target_label):
train_df = utils.s3_to_df(train_s3_uri)
train_df.drop([target_label], axis=1, inplace=True)
test_df = utils.s3_to_df(test_s3_uri)
infer_dir = os.environ['dataset_source']
infer_df_list = []
for filepath in pathlib.Path(infer_dir).rglob('*.jsonl'):
print(filepath)
df = utils.df_from_datacapture(filepath.absolute(), train_df.columns.to_list())
infer_df_list.append(df)
infer_df = pd.concat(infer_df_list)
drift_df = compute_drift(train_df, infer_df)
accuracy = compute_accuracy_with_drift(test_df, infer_df, target_label)
output = {
'accuracy': accuracy,
'drift_df': drift_df.to_json(),
'end_time': os.environ['end_time']
}
with open(f"{os.environ['output_path']}/results.json", 'w') as f:
json.dump(output, f)