in transformer-xl/utils/music_encoder.py [0:0]
def convert(self, input_folder, output_folder, mode):
num_cpus = mpl.cpu_count()
if not os.path.exists(output_folder):
os.makedirs(output_folder)
if mode == 'to_txt' or mode == 'midi_to_npy':
if mode == 'to_txt':
converted_format = 'txt'
convert_transposition_f = self.run_to_text_with_transposition
convert_f = self.run_to_text
else:
converted_format = 'npy'
convert_transposition_f = self.run_to_npy_with_transposition
convert_f = self.run_to_npy
print('Converting midi files from {} to {}...'
.format(input_folder, converted_format))
train_paths, valid_paths, test_paths = get_midi_paths(input_folder)
print('Loaded dataset from {}. Train/Val/Test={}/{}/{}'
.format(input_folder, len(train_paths), len(valid_paths),
len(test_paths)))
for split_name, midi_paths in [('train', train_paths),
('valid', valid_paths),
('test', test_paths)]:
if split_name == 'train':
convert_function = convert_transposition_f
else:
convert_function = convert_f
out_split_dir = os.path.join(output_folder, split_name)
os.makedirs(out_split_dir, exist_ok=True)
start = time.time()
with mpl.Pool(num_cpus - 1) as pool:
pool.map(functools.partial(convert_function, out_dir=out_split_dir),
midi_paths)
print('Split {} converted! Spent {}s to convert {} samples.'
.format(split_name, time.time() - start, len(midi_paths)))
self.encoder.create_vocab_txt(output_folder)
elif mode == 'to_midi' or mode == 'npy_to_midi':
convert_f = self.run_from_text if mode == 'to_midi' else self.run_npy_to_midi
start = time.time()
if mode == 'npy_to_midi':
input_paths = list(find_files_by_extensions(input_folder, ['.npy']))
else:
input_paths = list(find_files_by_extensions(input_folder, ['.txt']))
with mpl.Pool(num_cpus - 1) as pool:
pool.map(functools.partial(convert_f,
out_dir=output_folder),
input_paths)
print('Test converted! Spent {}s to convert {} samples.'
.format(time.time() - start, len(input_paths)))
else:
raise NotImplementedError