in muss/resources/datasets.py [0:0]
def mix_files(input_filepaths, props, output_filepath):
np.random.seed(0)
generators = [yield_lines(filepath) for filepath in input_filepaths]
has_looped = [False] * len(input_filepaths)
# Stop when all lines have been seen at least once
with open(output_filepath, 'w') as f:
while True:
idx = np.random.choice(range(len(input_filepaths)), p=props)
try:
line = next(generators[idx])
except StopIteration:
has_looped[idx] = True
# Start reading the file all over again
generators[idx] = yield_lines(input_filepaths[idx])
line = next(generators[idx])
if all(has_looped):
break
f.write(f'{line}\n')