in src/parse_musicnet.py [0:0]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', type=Path, required=True,
help='MusicNet directory')
parser.add_argument('-o', '--output', type=Path, required=True,
help='Output directory')
args = parser.parse_args()
print(args)
src = args.input
dst = args.output
dst.mkdir(exist_ok=True, parents=True)
domains = [
['Accompanied Violin', 'Beethoven'],
['Solo Cello', 'Bach'],
['Solo Piano', 'Bach'],
['Solo Piano', 'Beethoven'],
['String Quartet', 'Beethoven'],
['Wind Quintet', 'Cambini'],
]
db = pandas.read_csv(src / 'musicnet_metadata.csv')
traindir = src / 'train_data'
testdir = src / 'test_data'
for (ensemble, composer) in domains:
fid_list = db[(db["composer"] == composer) & (db["ensemble"] == ensemble)].id.tolist()
total_time = sum(db[(db["composer"] == composer) & (db["ensemble"] == ensemble)].seconds.tolist())
print(f"Total time for {composer} with {ensemble} is: {total_time} seconds")
domaindir = dst / f"{composer}_{ensemble.replace(' ', '_')}"
if not os.path.exists(domaindir):
os.mkdir(domaindir)
for fid in fid_list:
fname = traindir / f'{fid}.wav'
if not fname.exists():
fname = testdir / f'{fid}.wav'
copy(str(fname), str(domaindir))