libheif/codecs/vvc.cc (269 lines of code) (raw):
/*
* HEIF VVC codec.
* Copyright (c) 2023 Dirk Farin <dirk.farin@gmail.com>
*
* This file is part of libheif.
*
* libheif is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* libheif is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with libheif. If not, see <http://www.gnu.org/licenses/>.
*/
#include "vvc.h"
#include <cstring>
#include <string>
#include <cassert>
#include <iomanip>
#include <utility>
Error Box_vvcC::parse(BitstreamRange& range)
{
//parse_full_box_header(range);
uint8_t byte;
auto& c = m_configuration; // abbreviation
c.configurationVersion = range.read8();
c.avgFrameRate_times_256 = range.read16();
//printf("version: %d\n", c.configurationVersion);
byte = range.read8();
c.constantFrameRate = (byte & 0xc0) >> 6;
c.numTemporalLayers = (byte & 0x38) >> 3;
c.lengthSize = uint8_t(((byte & 0x06) >> 1) + 1);
c.ptl_present_flag = (byte & 0x01);
// assert(c.ptl_present_flag == false); // TODO (removed the assert since it will trigger the fuzzers)
byte = range.read8();
c.chroma_format_present_flag = (byte & 0x80);
c.chroma_format_idc = (byte & 0x60) >> 5;
c.bit_depth_present_flag = (byte & 0x10);
c.bit_depth = uint8_t(((byte & 0x0e) >> 1) + 8);
int nArrays = range.read8();
for (int i = 0; i < nArrays && !range.error(); i++) {
byte = range.read8();
NalArray array;
array.m_array_completeness = (byte >> 6) & 1;
array.m_NAL_unit_type = (byte & 0x3F);
int nUnits = range.read16();
for (int u = 0; u < nUnits && !range.error(); u++) {
std::vector<uint8_t> nal_unit;
int size = range.read16();
if (!size) {
// Ignore empty NAL units.
continue;
}
if (range.prepare_read(size)) {
nal_unit.resize(size);
bool success = range.get_istream()->read((char*) nal_unit.data(), size);
if (!success) {
return Error{heif_error_Invalid_input, heif_suberror_End_of_data, "error while reading hvcC box"};
}
}
array.m_nal_units.push_back(std::move(nal_unit));
}
m_nal_array.push_back(std::move(array));
}
#if 0
const int64_t configOBUs_bytes = range.get_remaining_bytes();
m_config_OBUs.resize(configOBUs_bytes);
if (!range.read(m_config_OBUs.data(), configOBUs_bytes)) {
// error
}
#endif
return range.get_error();
}
void Box_vvcC::append_nal_data(const std::vector<uint8_t>& nal)
{
NalArray array;
array.m_array_completeness = 0;
array.m_NAL_unit_type = uint8_t(nal[0] >> 1);
array.m_nal_units.push_back(nal);
m_nal_array.push_back(array);
}
void Box_vvcC::append_nal_data(const uint8_t* data, size_t size)
{
std::vector<uint8_t> nal;
nal.resize(size);
memcpy(nal.data(), data, size);
NalArray array;
array.m_array_completeness = 0;
array.m_NAL_unit_type = uint8_t(nal[0] >> 1);
array.m_nal_units.push_back(std::move(nal));
m_nal_array.push_back(array);
}
Error Box_vvcC::write(StreamWriter& writer) const
{
size_t box_start = reserve_box_header_space(writer);
const auto& c = m_configuration;
writer.write8(c.configurationVersion);
writer.write16(c.avgFrameRate_times_256);
assert(c.lengthSize == 1 || c.lengthSize == 2 || c.lengthSize == 4);
uint8_t v = (uint8_t) ((c.constantFrameRate << 6) |
(c.numTemporalLayers << 3) |
((c.lengthSize - 1) << 1) |
(c.ptl_present_flag ? 1 : 0));
writer.write8(v);
if (c.ptl_present_flag) {
assert(false); // TODO
//VvcPTLRecord(numTemporalLayers) track_ptl;
//unsigned int(16) output_layer_set_idx;
}
v = 0;
if (c.chroma_format_present_flag) {
v |= 0x80 | (c.chroma_format_idc << 5);
}
else {
v |= 0x60;
}
if (c.bit_depth_present_flag) {
v |= (uint8_t)(0x10 | ((c.bit_depth - 8) << 1));
}
else {
v |= 0x0e;
}
v |= 0x01; // reserved
writer.write8(v);
if (m_nal_array.size() >= 256) {
// TODO: error
}
if (m_nal_array.size() > 255) {
return {heif_error_Encoding_error, heif_suberror_Unspecified, "Too many VVC NAL arrays."};
}
writer.write8((uint8_t)m_nal_array.size());
for (const NalArray& nal_array : m_nal_array) {
uint8_t v2 = (nal_array.m_array_completeness ? 0x80 : 0);
v2 |= nal_array.m_NAL_unit_type;
writer.write8(v2);
if (nal_array.m_nal_units.size() > 0xFFFF) {
return {heif_error_Encoding_error, heif_suberror_Unspecified, "Too many VVC NAL units."};
}
writer.write16((uint16_t)nal_array.m_nal_units.size());
for (const auto& nal : nal_array.m_nal_units) {
if (nal.size() > 0xFFFF) {
return {heif_error_Encoding_error, heif_suberror_Unspecified, "VVC NAL too large."};
}
writer.write16((uint16_t)nal.size());
writer.write(nal);
}
}
prepend_header(writer, box_start);
return Error::Ok;
}
static const char* vvc_chroma_names[4] = {"mono", "4:2:0", "4:2:2", "4:4:4"};
std::string Box_vvcC::dump(Indent& indent) const
{
std::ostringstream sstr;
sstr << Box::dump(indent);
const auto& c = m_configuration; // abbreviation
sstr << indent << "version: " << ((int) c.configurationVersion) << "\n"
<< indent << "frame-rate: " << (c.avgFrameRate_times_256 / 256.0f) << "\n"
<< indent << "constant frame rate: " << (c.constantFrameRate == 1 ? "constant" : (c.constantFrameRate == 2 ? "multi-layer" : "unknown")) << "\n"
<< indent << "num temporal layers: " << ((int) c.numTemporalLayers) << "\n"
<< indent << "length size: " << ((int) c.lengthSize) << "\n"
<< indent << "chroma-format: ";
if (c.chroma_format_present_flag) {
sstr << vvc_chroma_names[c.chroma_format_idc] << "\n";
}
else {
sstr << "---\n";
}
sstr << indent << "bit-depth: ";
if (c.bit_depth_present_flag) {
sstr << ((int) c.bit_depth) << "\n";
}
else {
sstr << "---\n";
}
sstr << indent << "num of arrays: " << m_nal_array.size() << "\n";
sstr << indent << "config NALs:";
for (size_t i = 0; i < m_nal_array.size(); i++) {
indent++;
sstr << indent << "array completeness: " << ((int)m_nal_array[i].m_array_completeness) << "\n";
sstr << std::hex << std::setw(2) << std::setfill('0') << m_nal_array[i].m_NAL_unit_type << "\n";
for (const auto& nal : m_nal_array[i].m_nal_units) {
std::string ind = indent.get_string();
sstr << write_raw_data_as_hex(nal.data(), nal.size(), ind, ind);
}
}
sstr << std::dec << std::setw(0) << "\n";
return sstr.str();
}
static std::vector<uint8_t> remove_start_code_emulation(const uint8_t* sps, size_t size)
{
std::vector<uint8_t> out_data;
for (size_t i = 0; i < size; i++) {
if (i + 2 < size &&
sps[i] == 0 &&
sps[i + 1] == 0 &&
sps[i + 2] == 3) {
out_data.push_back(0);
out_data.push_back(0);
i += 2;
}
else {
out_data.push_back(sps[i]);
}
}
return out_data;
}
Error parse_sps_for_vvcC_configuration(const uint8_t* sps, size_t size,
Box_vvcC::configuration* config,
int* width, int* height)
{
// remove start-code emulation bytes from SPS header stream
std::vector<uint8_t> sps_no_emul = remove_start_code_emulation(sps, size);
sps = sps_no_emul.data();
size = sps_no_emul.size();
BitReader reader(sps, (int) size);
// skip NAL header
reader.skip_bits(2 * 8);
// skip SPS ID
reader.skip_bits(4);
// skip VPS ID
reader.skip_bits(4);
config->numTemporalLayers = (uint8_t)(reader.get_bits(3) + 1);
config->chroma_format_idc = (uint8_t)(reader.get_bits(2));
config->chroma_format_present_flag = true;
reader.skip_bits(2);
bool sps_ptl_dpb_hrd_params_present_flag = reader.get_bits(1);
if (sps_ptl_dpb_hrd_params_present_flag) {
// profile_tier_level( 1, sps_max_sublayers_minus1 )
if (true /*profileTierPresentFlag*/) {
//general_profile_idc
//general_tier_flag
reader.skip_bits(8);
}
reader.skip_bits(8); // general_level_idc
reader.skip_bits(1); //ptl_frame_only_constraint_flag
reader.skip_bits(1); //ptl_multilayer_enabled_flag
if (true /* profileTierPresentFlag*/ ) {
// general_constraints_info()
bool gci_present_flag = reader.get_bits(1);
if (gci_present_flag) {
assert(false);
}
reader.skip_to_byte_boundary();
}
std::vector<bool> ptl_sublayer_level_present_flag(config->numTemporalLayers);
for (int i = config->numTemporalLayers-2; i >= 0; i--) {
ptl_sublayer_level_present_flag[i] = reader.get_bits(1);
}
reader.skip_to_byte_boundary();
for (int i = config->numTemporalLayers-2; i >= 0; i--) {
if (ptl_sublayer_level_present_flag[i]) {
reader.skip_bits(8); // sublayer_level_idc[i]
}
}
if (true /*profileTierPresentFlag*/) {
int ptl_num_sub_profiles = reader.get_bits(8);
for (int i = 0; i < ptl_num_sub_profiles; i++) {
uint32_t idc = reader.get_bits(32); //general_sub_profile_idc[i]
(void) idc;
}
}
}
reader.skip_bits(1); // sps_gdr_enabled_flag
bool sps_ref_pic_resampling_enabled_flag = reader.get_bits(1);
if (sps_ref_pic_resampling_enabled_flag) {
reader.skip_bits(1); // sps_res_change_in_clvs_allowed_flag
}
int sps_pic_width_max_in_luma_samples;
int sps_pic_height_max_in_luma_samples;
bool success;
success = reader.get_uvlc(&sps_pic_width_max_in_luma_samples);
(void)success;
success = reader.get_uvlc(&sps_pic_height_max_in_luma_samples);
(void)success;
*width = sps_pic_width_max_in_luma_samples;
*height = sps_pic_height_max_in_luma_samples;
int sps_conformance_window_flag = reader.get_bits(1);
if (sps_conformance_window_flag) {
int left,right,top,bottom;
reader.get_uvlc(&left);
reader.get_uvlc(&right);
reader.get_uvlc(&top);
reader.get_uvlc(&bottom);
}
bool sps_subpic_info_present_flag = reader.get_bits(1);
if (sps_subpic_info_present_flag) {
assert(false); // TODO
}
int bitDepth_minus8;
success = reader.get_uvlc(&bitDepth_minus8);
(void)success;
if (bitDepth_minus8 > 0xFF - 8) {
return {heif_error_Encoding_error, heif_suberror_Unspecified, "VCC bit depth out of range."};
}
config->bit_depth = (uint8_t)(bitDepth_minus8 + 8);
config->bit_depth_present_flag = true;
return Error::Ok;
}