def clean_graph_data()

in src/utils.py [0:0]


def clean_graph_data(graph_files, data_dir = "./graph_data"):
    """
    Clean graph data by removing header lines
    
    :param graph_files: dict, with a format of {'graph_name': {'file': str, 'lines_to_skip': int}}
    :param data_dir: str, the directory path to graph data
    """
    
    for graph_name in graph_files.keys():
        
        # create a subfolder for each graph and save its file with header lines removed
        graph_folder = os.path.join(data_dir, graph_name)
        if not os.path.exists(graph_folder):
            os.makedirs(graph_folder)

        raw_file = os.path.join(data_dir, graph_files[graph_name]['file'])
        new_file = os.path.join(graph_folder, graph_files[graph_name]['file'])

        with open(raw_file, 'r') as f_raw:
            data = f_raw.read().splitlines(True) 
        with open(new_file, 'w') as f_new:
            f_new.writelines(data[graph_files[graph_name]['lines_to_skip']:])