in src/utils.py [0:0]
def clean_graph_data(graph_files, data_dir = "./graph_data"):
"""
Clean graph data by removing header lines
:param graph_files: dict, with a format of {'graph_name': {'file': str, 'lines_to_skip': int}}
:param data_dir: str, the directory path to graph data
"""
for graph_name in graph_files.keys():
# create a subfolder for each graph and save its file with header lines removed
graph_folder = os.path.join(data_dir, graph_name)
if not os.path.exists(graph_folder):
os.makedirs(graph_folder)
raw_file = os.path.join(data_dir, graph_files[graph_name]['file'])
new_file = os.path.join(graph_folder, graph_files[graph_name]['file'])
with open(raw_file, 'r') as f_raw:
data = f_raw.read().splitlines(True)
with open(new_file, 'w') as f_new:
f_new.writelines(data[graph_files[graph_name]['lines_to_skip']:])