in clutrr/utils/web.py [0:0]
def generate_webpage(data_path):
"""
Reads the list of directories, reads their config file, and generates a Github flavored webpage
<tr>
<td></td>
<td></td>
<td></td>
</tr>
:return:
"""
folders = glob.glob(os.path.join(data_path, '*', ''))
print("Found {} folders.".format(len(folders)))
web_page = template_header
generated_at = '<p>This webpage is autogenerated at {}</p>'.format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
data_names = []
unames = []
train = []
test = []
num_train = []
num_test = []
times = []
holdouts = []
for folder in folders:
print('Reading {}'.format(folder))
config = json.load(open(os.path.join(folder, 'config.json')))
train_task = config['train_task'].keys()
test_tasks = config['test_tasks'].keys()
train_rows = sum([config['args'][config['train_task'][tr]]['num_rows'] for tr in train_task])
test_rows = sum([config[config['test_tasks'][tr]]['num_rows'] for tr in test_tasks])
one_tt = list(train_task)[0]
name = folder.split('/')[-2]
name_url = '<a href={}>{}</a>'.format(name + '.zip', name)
gen_time = datetime.datetime.fromtimestamp(os.stat(folder).st_mtime).strftime("%y-%m-%d / %H:%M")
holdout = ','.join([config['args'][config['train_task'][tr]]['holdout'] if 'holdout' in config['args'][config['train_task'][tr]] else 'None' for tr in train_task])
data_names.append(config['args'][config['train_task'][one_tt]]['data_name'])
unames.append(name_url)
train.append(','.join(train_task))
num_train.append(train_rows)
num_test.append(test_rows)
test.append(','.join(test_tasks))
times.append(gen_time)
holdouts.append(holdout)
df = pd.DataFrame(data={'data_name': data_names, 'unames': unames, 'train': train, 'test':test, 'num_train':num_train, 'num_test':num_test, 'times':times, 'holdout':holdouts})
df.sort_values(by=['times'], inplace=True)
data_csv = os.path.join(data_path, 'dataset_details.csv')
df.to_csv(data_csv)
for i,row in df.iterrows():
row_web = '<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>'.format(
row['data_name'], row['unames'], row['train'], row['num_train'], row['test'], row['num_test'], row['times'], row['holdout'])
web_page += row_web
web_page += generated_at
web_page += template_footer.format('dataset_details.csv')
css = requests.get(CSS_TEMPLATE).text
with open(os.path.join(data_path, 'style.css'), 'w') as fp:
fp.write(css)
with open(os.path.join(data_path, 'index.html'), 'w') as fp:
fp.write(web_page)