void Compute()

in tensorflow_io/core/kernels/audio_video_mp4_kernels.cc [537:681]


  void Compute(OpKernelContext* context) override {
    const Tensor* input_tensor;
    OP_REQUIRES_OK(context, context->input("input", &input_tensor));

    const Tensor* rate_tensor;
    OP_REQUIRES_OK(context, context->input("rate", &rate_tensor));

    const int64 channels = input_tensor->shape().dim_size(1);
    OP_REQUIRES(
        context, (channels == static_cast<int16>(channels)),
        errors::InvalidArgument("channels ", channels, " > max(int16)"));

    const int64 rate = rate_tensor->scalar<int64>()();
    OP_REQUIRES(context, (rate == static_cast<int32>(rate)),
                errors::InvalidArgument("rate ", rate, " > max(int32)"));

    // Code from buildAacAudioSpecificConfig:
    // ExoPlayer/library/core/src/main/java/com/google/android/exoplayer2/util/CodecSpecificDataUtil.java
    static const int AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[] = {
        96000, 88200, 64000, 48000, 44100, 32000, 24000,
        22050, 16000, 12000, 11025, 8000,  7350};
    static const int AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[] = {
        0,  //
        1,  // mono: <FC>
        2,  // stereo: (FL, FR)
        3,  // 3.0: <FC>, (FL, FR)
        4,  // 4.0: <FC>, (FL, FR), <BC>
        5,  // 5.0 back: <FC>, (FL, FR), (SL, SR)
        6,  // 5.1 back: <FC>, (FL, FR), (SL, SR), <BC>, [LFE]
        8,  // 7.1 wide back: <FC>, (FCL, FCR), (FL, FR), (SL, SR), [LFE]
            // AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
            // AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
            // AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
            // 7, // 6.1: <FC>, (FL, FR), (SL, SR), <RC>, [LFE]
            // 8, // 7.1: <FC>, (FL, FR), (SL, SR), (BL, BR), [LFE]
            // AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID,
            // 8, // 7.1 top: <FC>, (FL, FR), (SL, SR), [LFE], (FTL, FTR)
            // AUDIO_SPECIFIC_CONFIG_CHANNEL_CONFIGURATION_INVALID
    };

    int audioObjectType = 2;  // AUDIO_OBJECT_TYPE_AAC_LC
    int sampleRateIndex = -1;
    for (int i = 0;
         i < sizeof(AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE) /
                 sizeof(AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[0]);
         i++) {
      if (rate == AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE[i]) {
        sampleRateIndex = i;
        break;
      }
    }
    OP_REQUIRES(
        context, (sampleRateIndex >= 0),
        errors::InvalidArgument("sample rate ", rate, " not supported"));
    int channelConfig = -1;
    for (int i = 0;
         i < sizeof(AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE) /
                 sizeof(AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[0]);
         i++) {
      if (channels == AUDIO_SPECIFIC_CONFIG_CHANNEL_COUNT_TABLE[i]) {
        channelConfig = i;
        break;
      }
    }
    OP_REQUIRES(
        context, (channelConfig >= 0),
        errors::InvalidArgument("channels ", channels, " not supported"));
    std::unique_ptr<void, void (*)(void*)> state(nullptr, [](void* p) {
      if (p != nullptr) {
        EncodeAACFunctionFini(p);
      }
    });

    state.reset(EncodeAACFunctionInit(0, rate, channels));
    OP_REQUIRES(context, (state.get() != nullptr),
                errors::InvalidArgument("unable to initialize encoder"));

    const float* data_in = input_tensor->flat<float>().data();
    const int64_t size_in = input_tensor->NumElements();

    int64_t chunk = input_tensor->NumElements() / channels / 1024 + 1;
    std::vector<char*> data_out_chunk((size_t)chunk);
    std::vector<int64_t> size_out_chunk((size_t)chunk);
    int status =
        EncodeAACFunctionCall(state.get(), data_in, size_in, &data_out_chunk[0],
                              &size_out_chunk[0], &chunk);
    OP_REQUIRES(context, (status == 0),
                errors::InvalidArgument("unable to encode aac"));

    Tensor* output_tensor = nullptr;
    OP_REQUIRES_OK(
        context, context->allocate_output(0, TensorShape({}), &output_tensor));

    tstring& output = output_tensor->scalar<tstring>()();
    int64 size_out = 0;
    for (int64 i = 0; i < chunk; i++) {
      size_out += size_out_chunk[i];
    }
    // At least size_out + 4096
    output.reserve(size_out + 4096);

    std::unique_ptr<MP4E_mux_t, void (*)(MP4E_mux_t*)> mux(nullptr,
                                                           [](MP4E_mux_t* p) {
                                                             if (p != nullptr) {
                                                               MP4E_close(p);
                                                               ;
                                                             }
                                                           });
    mux.reset(MP4E_open(0, 0, &output, AudioEncodeMP4AACWriteCallback));
    OP_REQUIRES(context, (mux.get() != nullptr),
                errors::InvalidArgument("unable open mux"));

    MP4E_track_t tr;
    tr.track_media_kind = e_audio;
    tr.language[0] = 'u';
    tr.language[1] = 'n';
    tr.language[2] = 'd';
    tr.language[3] = 0;
    tr.object_type_indication = MP4_OBJECT_TYPE_AUDIO_ISO_IEC_14496_3;
    tr.time_scale = rate;
    tr.default_duration = 0;
    tr.u.a.channelcount = channels;
    int audio_track_id = MP4E_add_track(mux.get(), &tr);

    unsigned char dsi[2];
    dsi[0] = (unsigned char)(((audioObjectType << 3) & 0xF8) |
                             ((sampleRateIndex >> 1) & 0x07));
    dsi[1] = (unsigned char)(((sampleRateIndex << 7) & 0x80) |
                             ((channelConfig << 3) & 0x78));

    status = MP4E_set_dsi(mux.get(), audio_track_id, dsi, sizeof(dsi));
    OP_REQUIRES(context, (status == 0),
                errors::InvalidArgument("unable to set dsi: ", status));

    for (int64 i = 0; i < chunk; i++) {
      status =
          MP4E_put_sample(mux.get(), audio_track_id, data_out_chunk[i],
                          size_out_chunk[i], 1024, MP4E_SAMPLE_RANDOM_ACCESS);
      OP_REQUIRES(
          context, (status == 0),
          errors::InvalidArgument("unable to mux packet ", i, ":", status));
    }
    // close mux
    mux.reset(nullptr);
  }