def split_file()

in Models/utils/dataset_split.py [0:0]


def split_file(input_path: RichPath, output_paths: Dict[str, RichPath], train_ratio: float,
               valid_ratio: float, test_ratio: float, test_only_projects: Set[str]) -> None:
    train_graphs, valid_graphs, test_graphs, test_only_graphs = [], [], [], []

    try:
        for datapoint in input_path.read_by_file_suffix():
            datapoint_provenance = datapoint['Filename']
            file_set = get_fold(datapoint_provenance, train_ratio, valid_ratio, test_only_projects)
            if file_set == 'train':
                train_graphs.append(datapoint)
            elif file_set == 'valid':
                valid_graphs.append(datapoint)
            elif file_set == 'test':
                test_graphs.append(datapoint)
            elif file_set == 'test-only':
                test_only_graphs.append(datapoint)
    except EOFError:
        print('Failed for file %s.' % input_path)
        return

    input_file_basename = input_path.basename()

    if train_ratio > 0:
        output_path = output_paths['train'].join(input_file_basename)
        print('Saving %s...' % (output_path,))
        output_path.save_as_compressed_file(train_graphs)

    if valid_ratio > 0:
        output_path = output_paths['valid'].join(input_file_basename)
        print('Saving %s...' % (output_path,))
        output_path.save_as_compressed_file(valid_graphs)

    if test_ratio > 0:
        output_path = output_paths['test'].join(input_file_basename)
        print('Saving %s...' % (output_path,))
        output_path.save_as_compressed_file(test_graphs)

    if len(test_only_graphs) > 0:
        output_path = output_paths['test-only'].join(input_file_basename)
        print('Saving %s...' % (output_path,))
        output_path.save_as_compressed_file(test_only_graphs)