auto apply_effects_fileobj()

in torchaudio/csrc/pybind/sox/effects.cpp [30:115]


auto apply_effects_fileobj(
    py::object fileobj,
    const std::vector<std::vector<std::string>>& effects,
    c10::optional<bool> normalize,
    c10::optional<bool> channels_first,
    c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t> {
  // Prepare the buffer used throughout the lifecycle of SoxEffectChain.
  //
  // For certain format (such as FLAC), libsox keeps reading the content at
  // the initialization unless it reaches EOF even when the header is properly
  // parsed. (Making buffer size 8192, which is way bigger than the header,
  // resulted in libsox consuming all the buffer content at the time it opens
  // the file.) Therefore buffer has to always contain valid data, except after
  // EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we
  // first check if there is enough data to fill the buffer. `read_fileobj`
  // repeatedly calls `read`  method until it receives the requested length of
  // bytes or it reaches EOF. If we get bytes shorter than requested, that means
  // the whole audio data are fetched.
  //
  // * This can be changed with `torchaudio.utils.sox_utils.set_buffer_size`.
  const auto capacity = [&]() {
    // NOTE:
    // Use the abstraction provided by `libtorchaudio` to access the global
    // config defined by libsox. Directly using `sox_get_globals` function will
    // end up retrieving the static variable defined in `_torchaudio`, which is
    // not correct.
    const auto bufsiz = get_buffer_size();
    const int64_t kDefaultCapacityInBytes = 256;
    return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
                                              : kDefaultCapacityInBytes;
  }();
  std::string buffer(capacity, '\0');
  auto* in_buf = const_cast<char*>(buffer.data());
  auto num_read = read_fileobj(&fileobj, capacity, in_buf);
  // If the file is shorter than 256, then libsox cannot read the header.
  auto in_buffer_size = (num_read > 256) ? num_read : 256;

  // Open file (this starts reading the header)
  // When opening a file there are two functions that can touches FILE*.
  // * `auto_detect_format`
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43
  // * `startread` handler of detected format.
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574
  // To see the handler of a particular format, go to
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/<FORMAT>.c
  // For example, voribs can be found
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158
  SoxFormat sf(sox_open_mem_read(
      in_buf,
      in_buffer_size,
      /*signal=*/nullptr,
      /*encoding=*/nullptr,
      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

  // In case of streamed data, length can be 0
  validate_input_memfile(sf);

  // Prepare output buffer
  std::vector<sox_sample_t> out_buffer;
  out_buffer.reserve(sf->signal.length);

  // Create and run SoxEffectsChain
  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
  torchaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
      /*input_encoding=*/sf->encoding,
      /*output_encoding=*/get_tensor_encodinginfo(dtype));
  chain.addInputFileObj(sf, in_buf, in_buffer_size, &fileobj);
  for (const auto& effect : effects) {
    chain.addEffect(effect);
  }
  chain.addOutputBuffer(&out_buffer);
  chain.run();

  // Create tensor from buffer
  bool channels_first_ = channels_first.value_or(true);
  auto tensor = convert_to_tensor(
      /*buffer=*/out_buffer.data(),
      /*num_samples=*/out_buffer.size(),
      /*num_channels=*/chain.getOutputNumChannels(),
      dtype,
      normalize.value_or(true),
      channels_first_);

  return std::make_tuple(
      tensor, static_cast<int64_t>(chain.getOutputSampleRate()));
}