in src/script.py [0:0]
def sort_jpegs(file_path, label_type, input_path, output_path):
with open(file_path) as f:
filenames = f.readlines()
# Get names of unique cases
patients = list(set([x[0:12] for x in filenames]))
# Train-val-test split
random.shuffle(patients)
splits = [0.7, 0.9]
patient_split = {'train': patients[0:int(splits[0] * len(patients))],
'valid': patients[int(splits[0] * len(patients)):int(splits[1] * len(patients))],
'test': patients[int(splits[1] * len(patients)):]}
slides = []
for split in ['train', 'valid', 'test']:
pathlib.Path(f'{output_path}/{split}/{label_type}').mkdir(parents=True, exist_ok=True)
for patient in patient_split[split]:
for image in glob(f'{input_path}/{patient}*_files'):
image_str = os.path.basename(image).split('_')[0]
slides.append(image_str)
for tile in glob(f'{image}/*/*.jpeg'):
tile_str = os.path.basename(tile)
new_str = '_'.join([split, image_str, tile_str])
shutil.copy(tile, os.path.join(output_path, split, label_type, new_str))
return slides