benchmarks/rnnt/ootb/inference/pytorch/utils/convert_librispeech.py [60:82]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                                       transcript=transcript))
    return input_data


print("[%s] Scaning input dir..." % args.output_json)
dataset = build_input_arr(input_dir=args.input_dir)

print("[%s] Converting audio files..." % args.output_json)
dataset = parallel_preprocess(dataset=dataset,
                              input_dir=args.input_dir,
                              dest_dir=args.dest_dir,
                              target_sr=args.target_sr,
                              speed=args.speed,
                              overwrite=args.overwrite,
                              parallel=args.parallel)

print("[%s] Generating json..." % args.output_json)
df = pd.DataFrame(dataset, dtype=object)

# Save json with python. df.to_json() produces back slashed in file paths
dataset = df.to_dict(orient='records')
with open(args.output_json, 'w') as fp:
    json.dump(dataset, fp, indent=2)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


benchmarks/rnnt/ootb/train/utils/convert_librispeech.py [59:81]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                                       transcript=transcript))
    return input_data


print("[%s] Scaning input dir..." % args.output_json)
dataset = build_input_arr(input_dir=args.input_dir)

print("[%s] Converting audio files..." % args.output_json)
dataset = parallel_preprocess(dataset=dataset,
                              input_dir=args.input_dir,
                              dest_dir=args.dest_dir,
                              target_sr=args.target_sr,
                              speed=args.speed,
                              overwrite=args.overwrite,
                              parallel=args.parallel)

print("[%s] Generating json..." % args.output_json)
df = pd.DataFrame(dataset, dtype=object)

# Save json with python. df.to_json() produces back slashed in file paths
dataset = df.to_dict(orient='records')
with open(args.output_json, 'w') as fp:
    json.dump(dataset, fp, indent=2)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -