def main()

in src/parse_musicnet.py [0:0]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', type=Path, required=True,
                        help='MusicNet directory')
    parser.add_argument('-o', '--output', type=Path, required=True,
                        help='Output directory')

    args = parser.parse_args()
    print(args)

    src = args.input
    dst = args.output
    dst.mkdir(exist_ok=True, parents=True)

    domains = [
        ['Accompanied Violin', 'Beethoven'],
        ['Solo Cello', 'Bach'],
        ['Solo Piano', 'Bach'],
        ['Solo Piano', 'Beethoven'],
        ['String Quartet', 'Beethoven'],
        ['Wind Quintet', 'Cambini'],
    ]

    db = pandas.read_csv(src / 'musicnet_metadata.csv')
    traindir = src / 'train_data'
    testdir = src / 'test_data'

    for (ensemble, composer) in domains:
        fid_list = db[(db["composer"] == composer) & (db["ensemble"] == ensemble)].id.tolist()
        total_time = sum(db[(db["composer"] == composer) & (db["ensemble"] == ensemble)].seconds.tolist())
        print(f"Total time for {composer} with {ensemble} is: {total_time} seconds")

        domaindir = dst / f"{composer}_{ensemble.replace(' ', '_')}"
        if not os.path.exists(domaindir):
            os.mkdir(domaindir)

        for fid in fid_list:
            fname = traindir / f'{fid}.wav'
            if not fname.exists():
                fname = testdir / f'{fid}.wav'

            copy(str(fname), str(domaindir))