datasets/librispeech.py [64:94]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    print(f"Number of tokens: {preprocessor.num_tokens}")
    trainset = Dataset(args.data_path, preprocessor, split="train", augment=False)
    if args.save_text is not None:
        with open(args.save_text, "w") as fid:
            fid.write("\n".join(t for _, t, _ in trainset.dataset))
    if args.save_tokens is not None:
        with open(args.save_tokens, "w") as fid:
            fid.write("\n".join(preprocessor.tokens))
    valset = Dataset(args.data_path, preprocessor, split="validation")
    testset = Dataset(args.data_path, preprocessor, split="test")
    print("Number of examples per dataset:")
    print(f"Training: {len(trainset)}")
    print(f"Validation: {len(valset)}")
    print(f"Test: {len(testset)}")

    if not args.compute_stats:
        import sys

        sys.exit(0)

    # Compute mean and var stats:
    audio = torch.cat([trainset[i][0] for i in range(len(trainset))], dim=2)
    mean = torch.mean(audio)
    std = torch.std(audio)
    print(f"Data mean {mean} and standard deviation {std}.")

    # Compute average lengths of audio and targets:
    avg_in_t = sum(w for (w, _), _ in trainset.sample_sizes()) / len(trainset)
    avg_tgt_l = sum(l for _, l in trainset.sample_sizes()) / len(trainset)
    print(f"Average audio length {avg_in_t} (s)")
    print(f"Average target length {avg_tgt_l}")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



datasets/wsj.py [64:94]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    print(f"Number of tokens: {preprocessor.num_tokens}")
    trainset = Dataset(args.data_path, preprocessor, split="train", augment=False)
    if args.save_text is not None:
        with open(args.save_text, "w") as fid:
            fid.write("\n".join(t for _, t, _ in trainset.dataset))
    if args.save_tokens is not None:
        with open(args.save_tokens, "w") as fid:
            fid.write("\n".join(preprocessor.tokens))
    valset = Dataset(args.data_path, preprocessor, split="validation")
    testset = Dataset(args.data_path, preprocessor, split="test")
    print("Number of examples per dataset:")
    print(f"Training: {len(trainset)}")
    print(f"Validation: {len(valset)}")
    print(f"Test: {len(testset)}")

    if not args.compute_stats:
        import sys

        sys.exit(0)

    # Compute mean and var stats:
    audio = torch.cat([trainset[i][0] for i in range(len(trainset))], dim=2)
    mean = torch.mean(audio)
    std = torch.std(audio)
    print(f"Data mean {mean} and standard deviation {std}.")

    # Compute average lengths of audio and targets:
    avg_in_t = sum(w for (w, _), _ in trainset.sample_sizes()) / len(trainset)
    avg_tgt_l = sum(l for _, l in trainset.sample_sizes()) / len(trainset)
    print(f"Average audio length {avg_in_t} (s)")
    print(f"Average target length {avg_tgt_l}")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



