struct heif_error svt_encode_image()

in libheif/plugins/encoder_svt.cc [567:834]


struct heif_error svt_encode_image(void* encoder_raw, const struct heif_image* image,
                                   heif_image_input_class input_class)
{
  auto* encoder = (struct encoder_struct_svt*) encoder_raw;
  EbErrorType res = EB_ErrorNone;

  encoder->compressed_data.clear();

  int w = heif_image_get_width(image, heif_channel_Y);
  int h = heif_image_get_height(image, heif_channel_Y);

  uint32_t encoded_width, encoded_height;
  svt_query_encoded_size(encoder_raw, w, h, &encoded_width, &encoded_height);

  // Note: it is ok to cast away the const, as the image content is not changed.
  // However, we have to guarantee that there are no plane pointers or stride values kept over calling the svt_encode_image() function.
  heif_error err = heif_image_extend_padding_to_size(const_cast<struct heif_image*>(image),
                                                     (int) encoded_width,
                                                     (int) encoded_height);
  if (err.code) {
    return err;
  }

  const heif_chroma chroma = heif_image_get_chroma_format(image);
  int bitdepth_y = heif_image_get_bits_per_pixel_range(image, heif_channel_Y);

  uint8_t yShift = 0;
  EbColorFormat color_format = EB_YUV420;

  if (input_class == heif_image_input_class_alpha) {
    color_format = EB_YUV420;
    //chromaPosition = RA_CHROMA_SAMPLE_POSITION_UNKNOWN;
    yShift = 1;
  }
  else {
    switch (chroma) {
      case heif_chroma_444:
        color_format = EB_YUV444;
        //chromaPosition = RA_CHROMA_SAMPLE_POSITION_COLOCATED;
        break;
      case heif_chroma_422:
        color_format = EB_YUV422;
        //chromaPosition = RA_CHROMA_SAMPLE_POSITION_COLOCATED;
        break;
      case heif_chroma_420:
        color_format = EB_YUV420;
        //chromaPosition = RA_CHROMA_SAMPLE_POSITION_UNKNOWN; // TODO: set to CENTER when AV1 and svt supports this
        yShift = 1;
        break;
      default:
        return heif_error_codec_library_error;
    }
  }


  // --- initialize the encoder

  EbComponentType* svt_encoder = nullptr;
  EbSvtAv1EncConfiguration svt_config;
  memset(&svt_config, 0, sizeof(EbSvtAv1EncConfiguration));

  res = svt_av1_enc_init_handle(&svt_encoder, nullptr, &svt_config);
  if (res != EB_ErrorNone) {
    //goto cleanup;
    return heif_error_codec_library_error;
  }

  svt_config.encoder_color_format = color_format;
  svt_config.encoder_bit_depth = (uint8_t) bitdepth_y;
  //svt_config.is_16bit_pipeline = bitdepth_y > 8;

  struct heif_color_profile_nclx* nclx = nullptr;
  err = heif_image_get_nclx_color_profile(image, &nclx);
  if (err.code != heif_error_Ok) {
    nclx = nullptr;
  }

  // make sure NCLX profile is deleted at end of function
  auto nclx_deleter = std::unique_ptr<heif_color_profile_nclx, void (*)(heif_color_profile_nclx*)>(nclx, heif_nclx_color_profile_free);

  if (nclx) {
    svt_config.color_description_present_flag = true;
#if SVT_AV1_VERSION_MAJOR >= 1
    svt_config.color_primaries = static_cast<EbColorPrimaries>(nclx->color_primaries);
    svt_config.transfer_characteristics = static_cast<EbTransferCharacteristics>(nclx->transfer_characteristics);
    svt_config.matrix_coefficients = static_cast<EbMatrixCoefficients>(nclx->matrix_coefficients);
    svt_config.color_range = nclx->full_range_flag ? EB_CR_FULL_RANGE : EB_CR_STUDIO_RANGE;
#else
    svt_config.color_primaries = static_cast<uint8_t>(nclx->color_primaries);
    svt_config.transfer_characteristics = static_cast<uint8_t>(nclx->transfer_characteristics);
    svt_config.matrix_coefficients = static_cast<uint8_t>(nclx->matrix_coefficients);
    svt_config.color_range = nclx->full_range_flag ? 1 : 0;
#endif


    // Follow comment in svt header: set if input is HDR10 BT2020 using SMPTE ST2084.
    svt_config.high_dynamic_range_input = (bitdepth_y == 10 && // TODO: should this be >8 ?
                                           nclx->color_primaries == heif_color_primaries_ITU_R_BT_2020_2_and_2100_0 &&
                                           nclx->transfer_characteristics == heif_transfer_characteristic_ITU_R_BT_2100_0_PQ &&
                                           nclx->matrix_coefficients == heif_matrix_coefficients_ITU_R_BT_2020_2_non_constant_luminance);
  }
  else {
    svt_config.color_description_present_flag = false;
  }


  svt_config.source_width = encoded_width;
  svt_config.source_height = encoded_height;
  svt_config.logical_processors = encoder->threads;

  // disable 2-pass
  svt_config.rc_stats_buffer = SvtAv1FixedBuf {nullptr, 0};

  svt_config.rate_control_mode = 0; // constant rate factor
  //svt_config.enable_adaptive_quantization = 0;   // 2 is CRF (the default), 0 would be CQP
  int qp;
  if (encoder->qp_set) {
    qp = encoder->qp;
  }
  else {
    qp = ((100 - encoder->quality) * 63 + 50) / 100;
  }
  svt_config.qp = qp;
  svt_config.min_qp_allowed = encoder->min_q;
  svt_config.max_qp_allowed = encoder->max_q;

  svt_config.tile_rows = int_log2(encoder->tile_rows);
  svt_config.tile_columns = int_log2(encoder->tile_cols);

  svt_config.enc_mode = (int8_t) encoder->speed;

  if (color_format == EB_YUV422 || bitdepth_y > 10) {
    svt_config.profile = PROFESSIONAL_PROFILE;
  }
  else if (color_format == EB_YUV444) {
    svt_config.profile = HIGH_PROFILE;
  }

  res = svt_av1_enc_set_parameter(svt_encoder, &svt_config);
  if (res == EB_ErrorBadParameter) {
    svt_av1_enc_deinit(svt_encoder);
    svt_av1_enc_deinit_handle(svt_encoder);
    return heif_error_codec_library_error;
  }

  res = svt_av1_enc_init(svt_encoder);
  if (res != EB_ErrorNone) {
    svt_av1_enc_deinit(svt_encoder);
    svt_av1_enc_deinit_handle(svt_encoder);
    return heif_error_codec_library_error;
  }


  // --- copy libheif image to svt image

  EbBufferHeaderType input_buffer;
  input_buffer.p_buffer = (uint8_t*) (new EbSvtIOFormat());

  memset(input_buffer.p_buffer, 0, sizeof(EbSvtIOFormat));
  input_buffer.size = sizeof(EbBufferHeaderType);
  input_buffer.p_app_private = nullptr;
  input_buffer.pic_type = EB_AV1_INVALID_PICTURE;
  input_buffer.metadata = nullptr;

  auto* input_picture_buffer = (EbSvtIOFormat*) input_buffer.p_buffer;

  int bytesPerPixel = bitdepth_y > 8 ? 2 : 1;
  if (input_class == heif_image_input_class_alpha) {
    int stride;
    input_picture_buffer->luma = (uint8_t*) heif_image_get_plane_readonly(image, heif_channel_Y, &stride);
    input_picture_buffer->y_stride = stride / bytesPerPixel;
    input_buffer.n_filled_len = stride * encoded_height;
  }
  else {
    int stride;
    input_picture_buffer->luma = (uint8_t*) heif_image_get_plane_readonly(image, heif_channel_Y, &stride);
    input_picture_buffer->y_stride = stride / bytesPerPixel;
    input_buffer.n_filled_len = stride * encoded_height;

    uint32_t uvHeight = (h + yShift) >> yShift;
    input_picture_buffer->cb = (uint8_t*) heif_image_get_plane_readonly(image, heif_channel_Cb, &stride);
    input_buffer.n_filled_len += stride * uvHeight;
    input_picture_buffer->cb_stride = stride / bytesPerPixel;

    input_picture_buffer->cr = (uint8_t*) heif_image_get_plane_readonly(image, heif_channel_Cr, &stride);
    input_buffer.n_filled_len += stride * uvHeight;
    input_picture_buffer->cr_stride = stride / bytesPerPixel;
  }

  input_buffer.flags = 0;
  input_buffer.pts = 0;

  EbAv1PictureType frame_type = EB_AV1_KEY_PICTURE;

  input_buffer.pic_type = frame_type;

  res = svt_av1_enc_send_picture(svt_encoder, &input_buffer);
  if (res != EB_ErrorNone) {
    delete input_buffer.p_buffer;
    svt_av1_enc_deinit(svt_encoder);
    svt_av1_enc_deinit_handle(svt_encoder);
    return heif_error_codec_library_error;
  }



  // --- flush encoder

  EbErrorType ret = EB_ErrorNone;

  EbBufferHeaderType flush_input_buffer;
  flush_input_buffer.n_alloc_len = 0;
  flush_input_buffer.n_filled_len = 0;
  flush_input_buffer.n_tick_count = 0;
  flush_input_buffer.p_app_private = nullptr;
  flush_input_buffer.flags = EB_BUFFERFLAG_EOS;
  flush_input_buffer.p_buffer = nullptr;
  flush_input_buffer.metadata = nullptr;

  ret = svt_av1_enc_send_picture(svt_encoder, &flush_input_buffer);

  if (ret != EB_ErrorNone) {
    delete input_buffer.p_buffer;
    svt_av1_enc_deinit(svt_encoder);
    svt_av1_enc_deinit_handle(svt_encoder);
    return heif_error_codec_library_error;
  }


  // --- read compressed picture

  int encode_at_eos = 0;
  uint8_t done_sending_pics = true;

  do {
    EbBufferHeaderType* output_buf = nullptr;

    res = svt_av1_enc_get_packet(svt_encoder, &output_buf, (uint8_t) done_sending_pics);
    if (output_buf != nullptr) {
      encode_at_eos = ((output_buf->flags & EB_BUFFERFLAG_EOS) == EB_BUFFERFLAG_EOS);
      if (output_buf->p_buffer && (output_buf->n_filled_len > 0)) {
        uint8_t* data = output_buf->p_buffer;
        uint32_t n = output_buf->n_filled_len;

        size_t oldSize = encoder->compressed_data.size();
        encoder->compressed_data.resize(oldSize + n);

        memcpy(encoder->compressed_data.data() + oldSize, data, n);

        encoder->data_read = false;
        // (output_buf->pic_type == EB_AV1_KEY_PICTURE));
      }
      svt_av1_enc_release_out_buffer(&output_buf);
    }
  } while (res == EB_ErrorNone && !encode_at_eos);


  delete input_buffer.p_buffer;
  svt_av1_enc_deinit(svt_encoder);
  svt_av1_enc_deinit_handle(svt_encoder);

  if (!done_sending_pics && ((res == EB_ErrorNone) || (res == EB_NoErrorEmptyQueue))) {
    return heif_error_ok;
  }
  else {
    return (res == EB_ErrorNone ? heif_error_ok : heif_error_codec_library_error);
  }
}