libvmaf/src/feature/integer_adm.c (2,192 lines of code) (raw):
/**
*
* Copyright 2016-2020 Netflix, Inc.
*
* Licensed under the BSD+Patent License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://opensource.org/licenses/BSDplusPatent
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "cpu.h"
#include "dict.h"
#include "feature_collector.h"
#include "feature_extractor.h"
#include "feature_name.h"
#include "integer_adm.h"
#include "log.h"
#if ARCH_X86
#include "x86/adm_avx2.h"
#elif ARCH_AARCH64
#include "arm64/adm_neon.h"
#include <arm_neon.h>
#endif
typedef struct AdmState {
size_t integer_stride;
AdmBuffer buf;
bool debug;
double adm_enhn_gain_limit;
double adm_norm_view_dist;
int adm_ref_display_height;
void (*dwt2_8)(const uint8_t *src, const adm_dwt_band_t *dst,
AdmBuffer *buf, int w, int h, int src_stride,
int dst_stride);
VmafDictionary *feature_name_dict;
} AdmState;
static const VmafOption options[] = {
{
.name = "debug",
.help = "debug mode: enable additional output",
.offset = offsetof(AdmState, debug),
.type = VMAF_OPT_TYPE_BOOL,
.default_val.b = false,
},
{
.name = "adm_enhn_gain_limit",
.alias = "egl",
.help = "enhancement gain imposed on adm, must be >= 1.0, "
"where 1.0 means the gain is completely disabled",
.offset = offsetof(AdmState, adm_enhn_gain_limit),
.type = VMAF_OPT_TYPE_DOUBLE,
.default_val.d = DEFAULT_ADM_ENHN_GAIN_LIMIT,
.min = 1.0,
.max = DEFAULT_ADM_ENHN_GAIN_LIMIT,
.flags = VMAF_OPT_FLAG_FEATURE_PARAM,
},
{
.name = "adm_norm_view_dist",
.alias = "nvd",
.help = "normalized viewing distance = viewing distance / ref display's physical height",
.offset = offsetof(AdmState, adm_norm_view_dist),
.type = VMAF_OPT_TYPE_DOUBLE,
.default_val.d = DEFAULT_ADM_NORM_VIEW_DIST,
.min = 0.75,
.max = 24.0,
.flags = VMAF_OPT_FLAG_FEATURE_PARAM,
},
{
.name = "adm_ref_display_height",
.alias = "rdh",
.help = "reference display height in pixels",
.offset = offsetof(AdmState, adm_ref_display_height),
.type = VMAF_OPT_TYPE_INT,
.default_val.i = DEFAULT_ADM_REF_DISPLAY_HEIGHT,
.min = 1,
.max = 4320,
.flags = VMAF_OPT_FLAG_FEATURE_PARAM,
},
{ 0 }
};
/*
* lambda = 0 (finest scale), 1, 2, 3 (coarsest scale);
* theta = 0 (ll), 1 (lh - vertical), 2 (hh - diagonal), 3(hl - horizontal).
*/
static inline float
dwt_quant_step(const struct dwt_model_params *params, int lambda, int theta,
double adm_norm_view_dist, int adm_ref_display_height)
{
// Formula (1), page 1165 - display visual resolution (DVR), in pixels/degree
// of visual angle. This should be 56.55
float r = adm_norm_view_dist * adm_ref_display_height * M_PI / 180.0;
// Formula (9), page 1171
float temp = log10(pow(2.0, lambda + 1)*params->f0*params->g[theta] / r);
float Q = 2.0*params->a*pow(10.0, params->k*temp*temp) /
dwt_7_9_basis_function_amplitudes[lambda][theta];
return Q;
}
// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 for Fixed-point
#define ADM_CM_THRESH_S_0_0(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
sum += flt_ptr[src_stride + 1]; \
sum += flt_ptr[src_stride]; \
sum += flt_ptr[src_stride + 1]; \
sum += flt_ptr[1]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[0]))+ 2048)>>12);\
sum += flt_ptr[1]; \
sum += flt_ptr[src_stride + 1]; \
sum += flt_ptr[src_stride]; \
sum += flt_ptr[src_stride + 1]; \
*accum += sum; \
} \
}
// i = 0, j = w-1: indices y: 1,0,1, x: w-2, w-1, w-1
#define ADM_CM_THRESH_S_0_W_M_1(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
sum += flt_ptr[src_stride + w - 2]; \
sum += flt_ptr[src_stride + w - 1]; \
sum += flt_ptr[src_stride + w - 1]; \
sum += flt_ptr[w - 2]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[w - 1]))+ 2048)>>12);\
sum += flt_ptr[w - 1]; \
sum += flt_ptr[src_stride + w - 2]; \
sum += flt_ptr[src_stride + w - 1]; \
sum += flt_ptr[src_stride + w - 1]; \
*accum += sum; \
} \
}
// i = 0, j = 1, ..., w-2: indices y: 1,0,1, x: j-1,j,j+1
#define ADM_CM_THRESH_S_0_J(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
sum += flt_ptr[src_stride + j - 1]; \
sum += flt_ptr[src_stride + j]; \
sum += flt_ptr[src_stride + j + 1]; \
sum += flt_ptr[j - 1]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[j]))+ 2048)>>12);\
sum += flt_ptr[j + 1]; \
sum += flt_ptr[src_stride + j - 1]; \
sum += flt_ptr[src_stride + j]; \
sum += flt_ptr[src_stride + j + 1]; \
*accum += sum; \
} \
}
// i = h-1, j = 0: indices y: h-2,h-1,h-1, x: 1,0,1
#define ADM_CM_THRESH_S_H_M_1_0(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
src_ptr += (src_stride * (h - 2)); \
flt_ptr += (src_stride * (h - 2)); \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[1]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[0]))+ 2048)>>12);\
sum += flt_ptr[1]; \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
*accum += sum; \
} \
}
// i = h-1, j = w-1: indices y: h-2,h-1,h-1, x: w-2, w-1, w-1
#define ADM_CM_THRESH_S_H_M_1_W_M_1(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (h - 2)); \
flt_ptr += (src_stride * (h - 2)); \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[w - 2]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[w - 1]))+ 2048)>>12);\
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
*accum += sum; \
} \
}
// i = h-1, j = 1, ..., w-2: indices y: h-2,h-1,h-1, x: j-1,j,j+1
#define ADM_CM_THRESH_S_H_M_1_J(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (h - 2)); \
flt_ptr += (src_stride * (h - 2)); \
sum += flt_ptr[j - 1];\
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[j - 1]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[j]))+ 2048)>>12);\
sum += flt_ptr[j + 1]; \
sum += flt_ptr[j - 1]; \
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
*accum += sum; \
} \
}
// i = 1,..,h-2, j = 1,..,w-2: indices y: i-1,i,i+1, x: j-1,j,j+1
#define ADM_CM_THRESH_S_I_J(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
src_ptr += (src_stride * (i - 1)); \
flt_ptr += (src_stride * (i - 1)); \
sum += flt_ptr[j - 1]; \
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[j - 1]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[j]))+ 2048)>>12);\
sum += flt_ptr[j + 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[j - 1]; \
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
*accum += sum; \
} \
}
// i = 1,..,h-2, j = 0: indices y: i-1,i,i+1, x: 1,0,1
#define ADM_CM_THRESH_S_I_0(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (i - 1)); \
flt_ptr += (src_stride * (i - 1)); \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[1]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[0]))+ 2048)>>12);\
sum += flt_ptr[1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
*accum += sum; \
} \
}
// i = 1,..,h-2, j = w-1: indices y: i-1,i,i+1, x: w-2,w-1,w-1
#define ADM_CM_THRESH_S_I_W_M_1(angles,flt_angles,src_stride,accum,w,h,i,j) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int16_t *src_ptr = angles[theta]; \
int16_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (i-1)); \
flt_ptr += (src_stride * (i - 1)); \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[w - 2]; \
sum += (int16_t)(((ONE_BY_15 * abs((int32_t) src_ptr[w - 1]))+ 2048)>>12);\
sum += flt_ptr[w - 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
*accum += sum; \
} \
}
// i = 0, j = 0: indices y: 1,0,1, x: 1,0,1 for Fixed-point
#define I4_ADM_CM_THRESH_S_0_0(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
sum += flt_ptr[src_stride + 1]; \
sum += flt_ptr[src_stride]; \
sum += flt_ptr[src_stride + 1]; \
sum += flt_ptr[1]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs( src_ptr[0]))+ add_bef_shift)>>shift);\
sum += flt_ptr[1]; \
sum += flt_ptr[src_stride + 1]; \
sum += flt_ptr[src_stride]; \
sum += flt_ptr[src_stride + 1]; \
*accum += sum; \
} \
}
// i = 0, j = w-1: indices y: 1,0,1, x: w-2, w-1, w-1
#define I4_ADM_CM_THRESH_S_0_W_M_1(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
sum += flt_ptr[src_stride + w - 2]; \
sum += flt_ptr[src_stride + w - 1]; \
sum += flt_ptr[src_stride + w - 1]; \
sum += flt_ptr[w - 2]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[w - 1]))+ add_bef_shift)>>shift);\
sum += flt_ptr[w - 1]; \
sum += flt_ptr[src_stride + w - 2]; \
sum += flt_ptr[src_stride + w - 1]; \
sum += flt_ptr[src_stride + w - 1]; \
*accum += sum; \
} \
}
// i = 0, j = 1, ..., w-2: indices y: 1,0,1, x: j-1,j,j+1
#define I4_ADM_CM_THRESH_S_0_J(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
sum += flt_ptr[src_stride + j - 1]; \
sum += flt_ptr[src_stride + j]; \
sum += flt_ptr[src_stride + j + 1]; \
sum += flt_ptr[j - 1]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[j]))+ add_bef_shift)>>shift);\
sum += flt_ptr[j + 1]; \
sum += flt_ptr[src_stride + j - 1]; \
sum += flt_ptr[src_stride + j]; \
sum += flt_ptr[src_stride + j + 1]; \
*accum += sum; \
} \
}
// i = h-1, j = 0: indices y: h-2,h-1,h-1, x: 1,0,1
#define I4_ADM_CM_THRESH_S_H_M_1_0(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
src_ptr += (src_stride * (h - 2)); \
flt_ptr += (src_stride * (h - 2)); \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[1]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[0]))+ add_bef_shift)>>shift);\
sum += flt_ptr[1]; \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
*accum += sum; \
} \
}
// i = h-1, j = w-1: indices y: h-2,h-1,h-1, x: w-2, w-1, w-1
#define I4_ADM_CM_THRESH_S_H_M_1_W_M_1(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (h - 2)); \
flt_ptr += (src_stride * (h - 2)); \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[w - 2]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[w - 1]))+ add_bef_shift)>>shift);\
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
*accum += sum; \
} \
}
// i = h-1, j = 1, ..., w-2: indices y: h-2,h-1,h-1, x: j-1,j,j+1
#define I4_ADM_CM_THRESH_S_H_M_1_J(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (h - 2)); \
flt_ptr += (src_stride * (h - 2)); \
sum += flt_ptr[j - 1];\
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[j - 1]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[j]))+ add_bef_shift)>>shift);\
sum += flt_ptr[j + 1]; \
sum += flt_ptr[j - 1]; \
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
*accum += sum; \
} \
}
// i = 1,..,h-2, j = 1,..,w-2: indices y: i-1,i,i+1, x: j-1,j,j+1
#define I4_ADM_CM_THRESH_S_I_J(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t sum = 0; \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
src_ptr += (src_stride * (i - 1)); \
flt_ptr += (src_stride * (i - 1)); \
sum += flt_ptr[j - 1]; \
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[j - 1]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[j]))+ add_bef_shift)>>shift);\
sum += flt_ptr[j + 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[j - 1]; \
sum += flt_ptr[j]; \
sum += flt_ptr[j + 1]; \
*accum += sum; \
} \
}
// i = 1,..,h-2, j = 0: indices y: i-1,i,i+1, x: 1,0,1
#define I4_ADM_CM_THRESH_S_I_0(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (i - 1)); \
flt_ptr += (src_stride * (i - 1)); \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[1]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[0]))+ add_bef_shift)>>shift);\
sum += flt_ptr[1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[1]; \
sum += flt_ptr[0]; \
sum += flt_ptr[1]; \
*accum += sum; \
} \
}
// i = 1,..,h-2, j = w-1: indices y: i-1,i,i+1, x: w-2,w-1,w-1
#define I4_ADM_CM_THRESH_S_I_W_M_1(angles,flt_angles,src_stride,accum,w,h,i,j,add_bef_shift,shift) \
{ \
*accum = 0; \
for (int theta = 0; theta < 3; ++theta) { \
int32_t *src_ptr = angles[theta]; \
int32_t *flt_ptr = flt_angles[theta]; \
int32_t sum = 0; \
src_ptr += (src_stride * (i-1)); \
flt_ptr += (src_stride * (i - 1)); \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[w - 2]; \
sum += (int32_t)((((int64_t)I4_ONE_BY_15 * abs((int32_t) src_ptr[w - 1]))+ add_bef_shift)>>shift);\
sum += flt_ptr[w - 1]; \
src_ptr += src_stride; \
flt_ptr += src_stride; \
sum += flt_ptr[w - 2]; \
sum += flt_ptr[w - 1]; \
sum += flt_ptr[w - 1]; \
*accum += sum; \
} \
}
#define ADM_CM_ACCUM_ROUND(x, thr, shift_xsub, x_sq, add_shift_xsq, shift_xsq, val, \
add_shift_xcub, shift_xcub, accum_inner) \
{ \
x = abs(x) - ((int32_t)(thr) << shift_xsub); \
x = x < 0 ? 0 : x; \
x_sq = (int32_t)((((int64_t)x * x) + add_shift_xsq) >> shift_xsq); \
val = (((int64_t)x_sq * x) + add_shift_xcub) >> shift_xcub; \
accum_inner += val; \
}
#define I4_ADM_CM_ACCUM_ROUND(x, thr, shift_sub, x_sq, add_shift_sq, shift_sq, val, \
add_shift_cub, shift_cub, accum_inner) \
{ \
x = abs(x) - (thr >> shift_sub); \
x = x < 0 ? 0 : x; \
x_sq = (int32_t)((((int64_t)x * x) + add_shift_sq) >> shift_sq); \
val = (((int64_t)x_sq * x) + add_shift_cub) >> shift_cub; \
accum_inner += val; \
}
static void dwt2_src_indices_filt(int **src_ind_y, int **src_ind_x, int w, int h)
{
int ind0, ind1, ind2, ind3;
const unsigned h_half = (h + 1) / 2;
const unsigned w_half = (w + 1) / 2;
unsigned i, j;
/* Vertical pass */
{ /* i : 0 */
src_ind_y[0][0] = 1;
src_ind_y[1][0] = 0;
src_ind_y[2][0] = 1;
src_ind_y[3][0] = 2;
}
for (i = 1; i < h_half - 2; ++i) { /* i : 1 to h_half - 3*/
ind1 = 2 * i;
ind0 = ind1 - 1;
ind2 = ind1 + 1;
ind3 = ind1 + 2;
src_ind_y[0][i] = ind0;
src_ind_y[1][i] = ind1;
src_ind_y[2][i] = ind2;
src_ind_y[3][i] = ind3;
}
for (i = h_half - 2; i < h_half; ++i) { /* i : h_half - 3 to h_half */
ind1 = 2 * i;
ind0 = ind1 - 1;
ind2 = ind1 + 1;
ind3 = ind1 + 2;
if (ind0 >= h) {
ind0 = (2 * h - ind0 - 1);
}
if (ind1 >= h) {
ind1 = (2 * h - ind1 - 1);
}
if (ind2 >= h) {
ind2 = (2 * h - ind2 - 1);
}
if (ind3 >= h) {
ind3 = (2 * h - ind3 - 1);
}
src_ind_y[0][i] = ind0;
src_ind_y[1][i] = ind1;
src_ind_y[2][i] = ind2;
src_ind_y[3][i] = ind3;
}
/* Horizontal pass */
{ /* j : 0 */
src_ind_x[0][0] = 1;
src_ind_x[1][0] = 0;
src_ind_x[2][0] = 1;
src_ind_x[3][0] = 2;
}
for (j = 1; j < w_half - 2; ++j) { /* j : 1 to w_half - 3 */
ind1 = 2 * j;
ind0 = ind1 - 1;
ind2 = ind1 + 1;
ind3 = ind1 + 2;
src_ind_x[0][j] = ind0;
src_ind_x[1][j] = ind1;
src_ind_x[2][j] = ind2;
src_ind_x[3][j] = ind3;
}
for (j = w_half - 2; j < w_half; ++j) { /* j : w_half - 3 to w_half */
ind1 = 2 * j;
ind0 = ind1 - 1;
ind2 = ind1 + 1;
ind3 = ind1 + 2;
if (ind0 >= w) {
ind0 = (2 * w - ind0 - 1);
}
if (ind1 >= w) {
ind1 = (2 * w - ind1 - 1);
}
if (ind2 >= w) {
ind2 = (2 * w - ind2 - 1);
}
if (ind3 >= w) {
ind3 = (2 * w - ind3 - 1);
}
src_ind_x[0][j] = ind0;
src_ind_x[1][j] = ind1;
src_ind_x[2][j] = ind2;
src_ind_x[3][j] = ind3;
}
}
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit)
{
const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0);
const adm_dwt_band_t *ref = &buf->ref_dwt2;
const adm_dwt_band_t *dis = &buf->dis_dwt2;
const adm_dwt_band_t *r = &buf->decouple_r;
const adm_dwt_band_t *a = &buf->decouple_a;
int left = w * ADM_BORDER_FACTOR - 0.5 - 1; // -1 for filter tap
int top = h * ADM_BORDER_FACTOR - 0.5 - 1;
int right = w - left + 2; // +2 for filter tap
int bottom = h - top + 2;
if (left < 0) {
left = 0;
}
if (right > w) {
right = w;
}
if (top < 0) {
top = 0;
}
if (bottom > h) {
bottom = h;
}
int64_t ot_dp, o_mag_sq, t_mag_sq;
for (int i = top; i < bottom; ++i) {
for (int j = left; j < right; ++j) {
int16_t oh = ref->band_h[i * stride + j];
int16_t ov = ref->band_v[i * stride + j];
int16_t od = ref->band_d[i * stride + j];
int16_t th = dis->band_h[i * stride + j];
int16_t tv = dis->band_v[i * stride + j];
int16_t td = dis->band_d[i * stride + j];
int16_t rst_h, rst_v, rst_d;
/* Determine if angle between (oh,ov) and (th,tv) is less than 1 degree.
* Given that u is the angle (oh,ov) and v is the angle (th,tv), this can
* be done by testing the inequvality.
*
* { (u.v.) >= 0 } AND { (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 }
*
* Proof:
*
* cos(theta) = (u.v) / (||u|| * ||v||)
*
* IF u.v >= 0 THEN
* cos(theta)^2 = (u.v)^2 / (||u||^2 * ||v||^2)
* (u.v)^2 = cos(theta)^2 * ||u||^2 * ||v||^2
*
* IF |theta| < 1deg THEN
* (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2
* END
* ELSE
* |theta| > 90deg
* END
*/
ot_dp = (int64_t)oh * th + (int64_t)ov * tv;
o_mag_sq = (int64_t)oh * oh + (int64_t)ov * ov;
t_mag_sq = (int64_t)th * th + (int64_t)tv * tv;
/**
* angle_flag is calculated in floating-point by converting fixed-point variables back to
* floating-point
*/
int angle_flag = (((float)ot_dp / 4096.0) >= 0.0f) &&
(((float)ot_dp / 4096.0) * ((float)ot_dp / 4096.0) >=
cos_1deg_sq * ((float)o_mag_sq / 4096.0) * ((float)t_mag_sq / 4096.0));
/**
* Division th/oh is carried using lookup table and converted to multiplication
*/
int32_t tmp_kh = (oh == 0) ?
32768 : (((int64_t)div_lookup[oh + 32768] * th) + 16384) >> 15;
int32_t tmp_kv = (ov == 0) ?
32768 : (((int64_t)div_lookup[ov + 32768] * tv) + 16384) >> 15;
int32_t tmp_kd = (od == 0) ?
32768 : (((int64_t)div_lookup[od + 32768] * td) + 16384) >> 15;
int32_t kh = tmp_kh < 0 ? 0 : (tmp_kh > 32768 ? 32768 : tmp_kh);
int32_t kv = tmp_kv < 0 ? 0 : (tmp_kv > 32768 ? 32768 : tmp_kv);
int32_t kd = tmp_kd < 0 ? 0 : (tmp_kd > 32768 ? 32768 : tmp_kd);
/**
* kh,kv,kd are in Q15 type and oh,ov,od are in Q16 type hence shifted by
* 15 to make result Q16
*/
rst_h = ((kh * oh) + 16384) >> 15;
rst_v = ((kv * ov) + 16384) >> 15;
rst_d = ((kd * od) + 16384) >> 15;
const float rst_h_f = ((float)kh / 32768) * ((float)oh / 64);
const float rst_v_f = ((float)kv / 32768) * ((float)ov / 64);
const float rst_d_f = ((float)kd / 32768) * ((float)od / 64);
if (angle_flag && (rst_h_f > 0.)) rst_h = MIN((rst_h * adm_enhn_gain_limit), th);
if (angle_flag && (rst_h_f < 0.)) rst_h = MAX((rst_h * adm_enhn_gain_limit), th);
if (angle_flag && (rst_v_f > 0.)) rst_v = MIN(rst_v * adm_enhn_gain_limit, tv);
if (angle_flag && (rst_v_f < 0.)) rst_v = MAX(rst_v * adm_enhn_gain_limit, tv);
if (angle_flag && (rst_d_f > 0.)) rst_d = MIN(rst_d * adm_enhn_gain_limit, td);
if (angle_flag && (rst_d_f < 0.)) rst_d = MAX(rst_d * adm_enhn_gain_limit, td);
r->band_h[i * stride + j] = rst_h;
r->band_v[i * stride + j] = rst_v;
r->band_d[i * stride + j] = rst_d;
a->band_h[i * stride + j] = th - rst_h;
a->band_v[i * stride + j] = tv - rst_v;
a->band_d[i * stride + j] = td - rst_d;
}
}
}
static inline uint16_t get_best15_from32(uint32_t temp, int *x)
{
int k = __builtin_clz(temp); //built in for intel
k = 17 - k;
temp = (temp + (1 << (k - 1))) >> k;
*x = k;
return temp;
}
static void adm_decouple_s123(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit)
{
const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0);
const i4_adm_dwt_band_t *ref = &buf->i4_ref_dwt2;
const i4_adm_dwt_band_t *dis = &buf->i4_dis_dwt2;
const i4_adm_dwt_band_t *r = &buf->i4_decouple_r;
const i4_adm_dwt_band_t *a = &buf->i4_decouple_a;
/* The computation of the score is not required for the regions
which lie outside the frame borders */
int left = w * ADM_BORDER_FACTOR - 0.5 - 1; // -1 for filter tap
int top = h * ADM_BORDER_FACTOR - 0.5 - 1;
int right = w - left + 2; // +2 for filter tap
int bottom = h - top + 2;
if (left < 0) {
left = 0;
}
if (right > w) {
right = w;
}
if (top < 0) {
top = 0;
}
if (bottom > h) {
bottom = h;
}
int64_t ot_dp, o_mag_sq, t_mag_sq;
for (int i = top; i < bottom; ++i)
{
for (int j = left; j < right; ++j)
{
int32_t oh = ref->band_h[i * stride + j];
int32_t ov = ref->band_v[i * stride + j];
int32_t od = ref->band_d[i * stride + j];
int32_t th = dis->band_h[i * stride + j];
int32_t tv = dis->band_v[i * stride + j];
int32_t td = dis->band_d[i * stride + j];
int32_t rst_h, rst_v, rst_d;
/* Determine if angle between (oh,ov) and (th,tv) is less than 1 degree.
* Given that u is the angle (oh,ov) and v is the angle (th,tv), this can
* be done by testing the inequvality.
*
* { (u.v.) >= 0 } AND { (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2 }
*
* Proof:
*
* cos(theta) = (u.v) / (||u|| * ||v||)
*
* IF u.v >= 0 THEN
* cos(theta)^2 = (u.v)^2 / (||u||^2 * ||v||^2)
* (u.v)^2 = cos(theta)^2 * ||u||^2 * ||v||^2
*
* IF |theta| < 1deg THEN
* (u.v)^2 >= cos(1deg)^2 * ||u||^2 * ||v||^2
* END
* ELSE
* |theta| > 90deg
* END
*/
ot_dp = (int64_t)oh * th + (int64_t)ov * tv;
o_mag_sq = (int64_t)oh * oh + (int64_t)ov * ov;
t_mag_sq = (int64_t)th * th + (int64_t)tv * tv;
int angle_flag = (((float)ot_dp / 4096.0) >= 0.0f) &&
(((float)ot_dp / 4096.0) * ((float)ot_dp / 4096.0) >=
cos_1deg_sq * ((float)o_mag_sq / 4096.0) * ((float)t_mag_sq / 4096.0));
/**
* Division th/oh is carried using lookup table and converted to multiplication
* int64 / int32 is converted to multiplication using following method
* num /den :
* DenAbs = Abs(den)
* MSBDen = MSB(DenAbs) (gives position of first 1 bit form msb side)
* If (DenAbs < (1 << 15))
* Round = (1<<14)
* Score = (num * div_lookup[den] + Round ) >> 15
* else
* RoundD = (1<< (16 - MSBDen))
* Round = (1<< (14 + (17 - MSBDen))
* Score = (num * div_lookup[(DenAbs + RoundD )>>(17 - MSBDen)]*sign(Denominator) + Round)
* >> ((15 + (17 - MSBDen))
*/
int32_t kh_shift = 0;
int32_t kv_shift = 0;
int32_t kd_shift = 0;
uint32_t abs_oh = abs(oh);
uint32_t abs_ov = abs(ov);
uint32_t abs_od = abs(od);
int8_t kh_sign = (oh < 0 ? -1 : 1);
int8_t kv_sign = (ov < 0 ? -1 : 1);
int8_t kd_sign = (od < 0 ? -1 : 1);
uint16_t kh_msb = (abs_oh < (32768) ? abs_oh : get_best15_from32(abs_oh, &kh_shift));
uint16_t kv_msb = (abs_ov < (32768) ? abs_ov : get_best15_from32(abs_ov, &kv_shift));
uint16_t kd_msb = (abs_od < (32768) ? abs_od : get_best15_from32(abs_od, &kd_shift));
int64_t tmp_kh = (oh == 0) ? 32768 : (((int64_t)div_lookup[kh_msb + 32768] * th)*(kh_sign) +
(1 << (14 + kh_shift))) >> (15 + kh_shift);
int64_t tmp_kv = (ov == 0) ? 32768 : (((int64_t)div_lookup[kv_msb + 32768] * tv)*(kv_sign) +
(1 << (14 + kv_shift))) >> (15 + kv_shift);
int64_t tmp_kd = (od == 0) ? 32768 : (((int64_t)div_lookup[kd_msb + 32768] * td)*(kd_sign) +
(1 << (14 + kd_shift))) >> (15 + kd_shift);
int64_t kh = tmp_kh < 0 ? 0 : (tmp_kh > 32768 ? 32768 : tmp_kh);
int64_t kv = tmp_kv < 0 ? 0 : (tmp_kv > 32768 ? 32768 : tmp_kv);
int64_t kd = tmp_kd < 0 ? 0 : (tmp_kd > 32768 ? 32768 : tmp_kd);
rst_h = ((kh * oh) + 16384) >> 15;
rst_v = ((kv * ov) + 16384) >> 15;
rst_d = ((kd * od) + 16384) >> 15;
const float rst_h_f = ((float)kh / 32768) * ((float)oh / 64);
const float rst_v_f = ((float)kv / 32768) * ((float)ov / 64);
const float rst_d_f = ((float)kd / 32768) * ((float)od / 64);
if (angle_flag && (rst_h_f > 0.)) rst_h = MIN((rst_h * adm_enhn_gain_limit), th);
if (angle_flag && (rst_h_f < 0.)) rst_h = MAX((rst_h * adm_enhn_gain_limit), th);
if (angle_flag && (rst_v_f > 0.)) rst_v = MIN(rst_v * adm_enhn_gain_limit, tv);
if (angle_flag && (rst_v_f < 0.)) rst_v = MAX(rst_v * adm_enhn_gain_limit, tv);
if (angle_flag && (rst_d_f > 0.)) rst_d = MIN(rst_d * adm_enhn_gain_limit, td);
if (angle_flag && (rst_d_f < 0.)) rst_d = MAX(rst_d * adm_enhn_gain_limit, td);
r->band_h[i * stride + j] = rst_h;
r->band_v[i * stride + j] = rst_v;
r->band_d[i * stride + j] = rst_d;
a->band_h[i * stride + j] = th - rst_h;
a->band_v[i * stride + j] = tv - rst_v;
a->band_d[i * stride + j] = td - rst_d;
}
}
}
static void adm_csf(AdmBuffer *buf, int w, int h, int stride,
double adm_norm_view_dist, int adm_ref_display_height)
{
const adm_dwt_band_t *src = &buf->decouple_a;
const adm_dwt_band_t *dst = &buf->csf_a;
const adm_dwt_band_t *flt = &buf->csf_f;
const int16_t *src_angles[3] = { src->band_h, src->band_v, src->band_d };
int16_t *dst_angles[3] = { dst->band_h, dst->band_v, dst->band_d };
int16_t *flt_angles[3] = { flt->band_h, flt->band_v, flt->band_d };
// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
// 1 to 4 (from finest scale to coarsest scale).
// 0 is scale zero passed to dwt_quant_step
const float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], 0, 1, adm_norm_view_dist, adm_ref_display_height);
const float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], 0, 2, adm_norm_view_dist, adm_ref_display_height);
const float rfactor1[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
/**
* rfactor is converted to fixed-point for scale0 and stored in i_rfactor
* multiplied by 2^21 for rfactor[0,1] and by 2^23 for rfactor[2].
* For adm_norm_view_dist 3.0 and adm_ref_display_height 1080,
* i_rfactor is around { 36453,36453,49417 }
*/
uint16_t i_rfactor[3];
if (fabs(adm_norm_view_dist * adm_ref_display_height - DEFAULT_ADM_NORM_VIEW_DIST * DEFAULT_ADM_REF_DISPLAY_HEIGHT) < 1.0e-8) {
i_rfactor[0] = 36453;
i_rfactor[1] = 36453;
i_rfactor[2] = 49417;
}
else {
const double pow2_21 = pow(2, 21);
const double pow2_23 = pow(2, 23);
i_rfactor[0] = (uint16_t) (rfactor1[0] * pow2_21);
i_rfactor[1] = (uint16_t) (rfactor1[1] * pow2_21);
i_rfactor[2] = (uint16_t) (rfactor1[2] * pow2_23);
}
/**
* Shifts pending from previous stage is 6
* hence variables multiplied by i_rfactor[0,1] has to be shifted by 21+6=27 to convert
* into floating-point. But shifted by 15 to make it Q16
* and variables multiplied by i_factor[2] has to be shifted by 23+6=29 to convert into
* floating-point. But shifted by 17 to make it Q16
* Hence remaining shifts after shifting by i_shifts is 12 to make it equivalent to
* floating-point
*/
uint8_t i_shifts[3] = { 15,15,17 };
uint16_t i_shiftsadd[3] = { 16384, 16384, 65535 };
uint16_t FIX_ONE_BY_30 = 4369; //(1/30)*2^17
/* The computation of the csf values is not required for the regions which
*lie outside the frame borders
*/
int left = w * ADM_BORDER_FACTOR - 0.5 - 1; // -1 for filter tap
int top = h * ADM_BORDER_FACTOR - 0.5 - 1;
int right = w - left + 2; // +2 for filter tap
int bottom = h - top + 2;
if (left < 0) {
left = 0;
}
if (right > w) {
right = w;
}
if (top < 0) {
top = 0;
}
if (bottom > h) {
bottom = h;
}
for (int theta = 0; theta < 3; ++theta) {
const int16_t *src_ptr = src_angles[theta];
int16_t *dst_ptr = dst_angles[theta];
int16_t *flt_ptr = flt_angles[theta];
for (int i = top; i < bottom; ++i) {
int src_offset = i * stride;
int dst_offset = i * stride;
for (int j = left; j < right; ++j) {
int32_t dst_val = i_rfactor[theta] * (int32_t)src_ptr[src_offset + j];
int16_t i16_dst_val = ((int16_t)((dst_val + i_shiftsadd[theta]) >> i_shifts[theta]));
dst_ptr[dst_offset + j] = i16_dst_val;
flt_ptr[dst_offset + j] = ((int16_t)(((FIX_ONE_BY_30 * abs((int32_t)i16_dst_val))
+ 2048) >> 12));
}
}
}
}
static void i4_adm_csf(AdmBuffer *buf, int scale, int w, int h, int stride,
double adm_norm_view_dist, int adm_ref_display_height)
{
const i4_adm_dwt_band_t *src = &buf->i4_decouple_a;
const i4_adm_dwt_band_t *dst = &buf->i4_csf_a;
const i4_adm_dwt_band_t *flt = &buf->i4_csf_f;
const int32_t *src_angles[3] = { src->band_h, src->band_v, src->band_d };
int32_t *dst_angles[3] = { dst->band_h, dst->band_v, dst->band_d };
int32_t *flt_angles[3] = { flt->band_h, flt->band_v, flt->band_d };
// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
// 1 to 4 (from finest scale to coarsest scale).
const float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1, adm_norm_view_dist, adm_ref_display_height);
const float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2, adm_norm_view_dist, adm_ref_display_height);
const float rfactor1[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
//i_rfactor in fixed-point
const double pow2_32 = pow(2, 32);
const uint32_t i_rfactor[3] = { (uint32_t)(rfactor1[0] * pow2_32),
(uint32_t)(rfactor1[1] * pow2_32),
(uint32_t)(rfactor1[2] * pow2_32) };
const uint32_t FIX_ONE_BY_30 = 143165577;
const uint32_t shift_dst[3] = { 28, 28, 28 };
const uint32_t shift_flt[3] = { 32, 32, 32 };
int32_t add_bef_shift_dst[3], add_bef_shift_flt[3];
for (unsigned idx = 0; idx < 3; ++idx) {
add_bef_shift_dst[idx] = (1u << (shift_dst[idx] - 1));
add_bef_shift_flt[idx] = (1u << (shift_flt[idx] - 1));
}
/* The computation of the csf values is not required for the regions
* which lie outside the frame borders
*/
int left = w * ADM_BORDER_FACTOR - 0.5 - 1; // -1 for filter tap
int top = h * ADM_BORDER_FACTOR - 0.5 - 1;
int right = w - left + 2; // +2 for filter tap
int bottom = h - top + 2;
if (left < 0) {
left = 0;
}
if (right > w) {
right = w;
}
if (top < 0) {
top = 0;
}
if (bottom > h) {
bottom = h;
}
for (int theta = 0; theta < 3; ++theta)
{
const int32_t *src_ptr = src_angles[theta];
int32_t *dst_ptr = dst_angles[theta];
int32_t *flt_ptr = flt_angles[theta];
for (int i = top; i < bottom; ++i)
{
int src_offset = i * stride;
int dst_offset = i * stride;
for (int j = left; j < right; ++j)
{
int32_t dst_val = (int32_t)(((i_rfactor[theta] * (int64_t)src_ptr[src_offset + j]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
dst_ptr[dst_offset + j] = dst_val;
flt_ptr[dst_offset + j] = (int32_t)((((int64_t)FIX_ONE_BY_30 * abs(dst_val)) +
add_bef_shift_flt[scale - 1]) >> shift_flt[scale - 1]);
}
}
}
}
static float adm_csf_den_scale(const adm_dwt_band_t *src, int w, int h,
int src_stride,
double adm_norm_view_dist, int adm_ref_display_height)
{
// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
// 1 to 4 (from finest scale to coarsest scale).
const float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], 0, 1, adm_norm_view_dist, adm_ref_display_height);
const float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], 0, 2, adm_norm_view_dist, adm_ref_display_height);
const float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
uint64_t accum_h = 0, accum_v = 0, accum_d = 0;
/* The computation of the denominator scales is not required for the regions
* which lie outside the frame borders
*/
const int left = w * ADM_BORDER_FACTOR - 0.5;
const int top = h * ADM_BORDER_FACTOR - 0.5;
const int right = w - left;
const int bottom = h - top;
int32_t shift_accum = (int32_t)ceil(log2((bottom - top)*(right - left)) - 20);
shift_accum = shift_accum > 0 ? shift_accum : 0;
int32_t add_shift_accum =
shift_accum > 0 ? (1 << (shift_accum - 1)) : 0;
/**
* The rfactor is multiplied at the end after cubing
* Because d+ = (a[i]^3)*(r^3)
* is equivalent to d+=a[i]^3 and d=d*(r^3)
*/
int16_t *src_h = src->band_h + top * src_stride;
int16_t *src_v = src->band_v + top * src_stride;
int16_t *src_d = src->band_d + top * src_stride;
for (int i = top; i < bottom; ++i) {
uint64_t accum_inner_h = 0;
uint64_t accum_inner_v = 0;
uint64_t accum_inner_d = 0;
for (int j = left; j < right; ++j) {
uint16_t h = (uint16_t)abs(src_h[j]);
uint16_t v = (uint16_t)abs(src_v[j]);
uint16_t d = (uint16_t)abs(src_d[j]);
uint64_t val = ((uint64_t)h * h) * h;
accum_inner_h += val;
val = ((uint64_t)v * v) * v;
accum_inner_v += val;
val = ((uint64_t)d * d) * d;
accum_inner_d += val;
}
/**
* max_value of h^3, v^3, d^3 is 1.205624776 * —10^13
* accum_h can hold till 1.844674407 * —10^19
* accum_h's maximum is reached when it is 2^20 * max(h^3)
* Therefore the accum_h,v,d is shifted based on width and height subtracted by 20
*/
accum_h += (accum_inner_h + add_shift_accum) >> shift_accum;
accum_v += (accum_inner_v + add_shift_accum) >> shift_accum;
accum_d += (accum_inner_d + add_shift_accum) >> shift_accum;
src_h += src_stride;
src_v += src_stride;
src_d += src_stride;
}
/**
* rfactor is multiplied after cubing
* accum_h,v,d is converted to floating-point for score calculation
* 6bits are yet to be shifted from previous stage that is after dwt hence
* after cubing 18bits are to shifted
* Hence final shift is 18-shift_accum
*/
double shift_csf = pow(2, (18 - shift_accum));
double csf_h = (double)(accum_h / shift_csf) * pow(rfactor[0], 3);
double csf_v = (double)(accum_v / shift_csf) * pow(rfactor[1], 3);
double csf_d = (double)(accum_d / shift_csf) * pow(rfactor[2], 3);
float powf_add = powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
float den_scale_h = powf(csf_h, 1.0f / 3.0f) + powf_add;
float den_scale_v = powf(csf_v, 1.0f / 3.0f) + powf_add;
float den_scale_d = powf(csf_d, 1.0f / 3.0f) + powf_add;
return(den_scale_h + den_scale_v + den_scale_d);
}
static float adm_csf_den_s123(const i4_adm_dwt_band_t *src, int scale, int w, int h,
int src_stride,
double adm_norm_view_dist, int adm_ref_display_height)
{
// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
// 1 to 4 (from finest scale to coarsest scale).
float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1, adm_norm_view_dist, adm_ref_display_height);
float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2, adm_norm_view_dist, adm_ref_display_height);
const float rfactor[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
uint64_t accum_h = 0, accum_v = 0, accum_d = 0;
const uint32_t shift_sq[3] = { 31, 30, 31 };
const uint32_t accum_convert_float[3] = { 32, 27, 23 };
const uint32_t add_shift_sq[3] =
{ 1u << shift_sq[0], 1u << shift_sq[1], 1u << shift_sq[2] };
/* The computation of the denominator scales is not required for the regions
* which lie outside the frame borders
*/
const int left = w * ADM_BORDER_FACTOR - 0.5;
const int top = h * ADM_BORDER_FACTOR - 0.5;
const int right = w - left;
const int bottom = h - top;
uint32_t shift_cub = (uint32_t)ceil(log2(right - left));
uint32_t add_shift_cub = (uint32_t)pow(2, (shift_cub - 1));
uint32_t shift_accum = (uint32_t)ceil(log2(bottom - top));
uint32_t add_shift_accum = (uint32_t)pow(2, (shift_accum - 1));
int32_t *src_h = src->band_h + top * src_stride;
int32_t *src_v = src->band_v + top * src_stride;
int32_t *src_d = src->band_d + top * src_stride;
for (int i = top; i < bottom; ++i)
{
uint64_t accum_inner_h = 0;
uint64_t accum_inner_v = 0;
uint64_t accum_inner_d = 0;
for (int j = left; j < right; ++j)
{
uint32_t h = (uint32_t)abs(src_h[j]);
uint32_t v = (uint32_t)abs(src_v[j]);
uint32_t d = (uint32_t)abs(src_d[j]);
uint64_t val = ((((((uint64_t)h * h) + add_shift_sq[scale - 1]) >>
shift_sq[scale - 1]) * h) + add_shift_cub) >> shift_cub;
accum_inner_h += val;
val = ((((((uint64_t)v * v) + add_shift_sq[scale - 1]) >>
shift_sq[scale - 1]) * v) + add_shift_cub) >> shift_cub;
accum_inner_v += val;
val = ((((((uint64_t)d * d) + add_shift_sq[scale - 1]) >>
shift_sq[scale - 1]) * d) + add_shift_cub) >> shift_cub;
accum_inner_d += val;
}
accum_h += (accum_inner_h + add_shift_accum) >> shift_accum;
accum_v += (accum_inner_v + add_shift_accum) >> shift_accum;
accum_d += (accum_inner_d + add_shift_accum) >> shift_accum;
src_h += src_stride;
src_v += src_stride;
src_d += src_stride;
}
/**
* All the results are converted to floating-point to calculate the scores
* For all scales the final shift is 3*shifts from dwt - total shifts done here
*/
double shift_csf = pow(2, (accum_convert_float[scale - 1] - shift_accum - shift_cub));
double csf_h = (double)(accum_h / shift_csf) * pow(rfactor[0], 3);
double csf_v = (double)(accum_v / shift_csf) * pow(rfactor[1], 3);
double csf_d = (double)(accum_d / shift_csf) * pow(rfactor[2], 3);
float powf_add = powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
float den_scale_h = powf(csf_h, 1.0f / 3.0f) + powf_add;
float den_scale_v = powf(csf_v, 1.0f / 3.0f) + powf_add;
float den_scale_d = powf(csf_d, 1.0f / 3.0f) + powf_add;
return (den_scale_h + den_scale_v + den_scale_d);
}
static float adm_cm(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stride,
double adm_norm_view_dist, int adm_ref_display_height)
{
const adm_dwt_band_t *src = &buf->decouple_r;
const adm_dwt_band_t *csf_f = &buf->csf_f;
const adm_dwt_band_t *csf_a = &buf->csf_a;
// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
// 1 to 4 (from finest scale to coarsest scale).
// 0 is scale zero passed to dwt_quant_step
const float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], 0, 1, adm_norm_view_dist, adm_ref_display_height);
const float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], 0, 2, adm_norm_view_dist, adm_ref_display_height);
const float rfactor1[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
/**
* rfactor is converted to fixed-point for scale0 and stored in i_rfactor
* multiplied by 2^21 for rfactor[0,1] and by 2^23 for rfactor[2].
* For adm_norm_view_dist 3.0 and adm_ref_display_height 1080,
* i_rfactor is around { 36453,36453,49417 }
*/
uint16_t i_rfactor[3];
if (fabs(adm_norm_view_dist * adm_ref_display_height - DEFAULT_ADM_NORM_VIEW_DIST * DEFAULT_ADM_REF_DISPLAY_HEIGHT) < 1.0e-8) {
i_rfactor[0] = 36453;
i_rfactor[1] = 36453;
i_rfactor[2] = 49417;
}
else {
const double pow2_21 = pow(2, 21);
const double pow2_23 = pow(2, 23);
i_rfactor[0] = (uint16_t) (rfactor1[0] * pow2_21);
i_rfactor[1] = (uint16_t) (rfactor1[1] * pow2_21);
i_rfactor[2] = (uint16_t) (rfactor1[2] * pow2_23);
}
const int32_t shift_xhsq = 29;
const int32_t shift_xvsq = 29;
const int32_t shift_xdsq = 30;
const int32_t add_shift_xhsq = 268435456;
const int32_t add_shift_xvsq = 268435456;
const int32_t add_shift_xdsq = 536870912;
const uint32_t shift_xhcub = (uint32_t)ceil(log2(w) - 4);
const uint32_t add_shift_xhcub = (uint32_t)pow(2, (shift_xhcub - 1));
const uint32_t shift_xvcub = (uint32_t)ceil(log2(w) - 4);
const uint32_t add_shift_xvcub = (uint32_t)pow(2, (shift_xvcub - 1));
const uint32_t shift_xdcub = (uint32_t)ceil(log2(w) - 3);
const uint32_t add_shift_xdcub = (uint32_t)pow(2, (shift_xdcub - 1));
const uint32_t shift_inner_accum = (uint32_t)ceil(log2(h));
const uint32_t add_shift_inner_accum = (uint32_t)pow(2, (shift_inner_accum - 1));
const int32_t shift_xhsub = 10;
const int32_t shift_xvsub = 10;
const int32_t shift_xdsub = 12;
int16_t *angles[3] = { csf_a->band_h, csf_a->band_v, csf_a->band_d };
int16_t *flt_angles[3] = { csf_f->band_h, csf_f->band_v, csf_f->band_d };
/* The computation of the scales is not required for the regions which lie
* outside the frame borders
*/
int left = w * ADM_BORDER_FACTOR - 0.5;
int top = h * ADM_BORDER_FACTOR - 0.5;
int right = w - left;
int bottom = h - top;
const int start_col = (left > 1) ? left : 1;
const int end_col = (right < (w - 1)) ? right : (w - 1);
const int start_row = (top > 1) ? top : 1;
const int end_row = (bottom < (h - 1)) ? bottom : (h - 1);
int i, j;
int64_t val;
int32_t xh, xv, xd, thr;
int32_t xh_sq, xv_sq, xd_sq;
int64_t accum_h = 0, accum_v = 0, accum_d = 0;
int64_t accum_inner_h = 0, accum_inner_v = 0, accum_inner_d = 0;
/* i=0,j=0 */
if ((top <= 0) && (left <= 0))
{
xh = (int32_t)src->band_h[0] * i_rfactor[0];
xv = (int32_t)src->band_v[0] * i_rfactor[1];
xd = (int32_t)src->band_d[0] * i_rfactor[2];
ADM_CM_THRESH_S_0_0(angles, flt_angles, csf_a_stride, &thr, w, h, 0, 0);
//thr is shifted to make it's Q format equivalent to xh,xv,xd
/**
* max value of xh_sq and xv_sq is 1301381973 and that of xd_sq is 1195806729
*
* max(val before shift for h and v) is 9.995357299 * —10^17.
* 9.995357299 * —10^17 * 2^4 is close to 2^64.
* Hence shift is done based on width subtracting 4
*
* max(val before shift for d) is 1.355006643 * —10^18
* 1.355006643 * —10^18 * 2^3 is close to 2^64
* Hence shift is done based on width subtracting 3
*/
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
/* i=0, j */
if (top <= 0) {
for (j = start_col; j < end_col; ++j) {
xh = src->band_h[j] * i_rfactor[0];
xv = src->band_v[j] * i_rfactor[1];
xd = src->band_d[j] * i_rfactor[2];
ADM_CM_THRESH_S_0_J(angles, flt_angles, csf_a_stride, &thr, w, h, 0, j);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
}
/* i=0,j=w-1 */
if ((top <= 0) && (right > (w - 1)))
{
xh = src->band_h[w - 1] * i_rfactor[0];
xv = src->band_v[w - 1] * i_rfactor[1];
xd = src->band_d[w - 1] * i_rfactor[2];
ADM_CM_THRESH_S_0_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, 0, (w - 1));
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
//Shift is done based on height
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
if ((left > 0) && (right <= (w - 1))) /* Completely within frame */
{
for (i = start_row; i < end_row; ++i) {
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
for (j = start_col; j < end_col; ++j) {
xh = src->band_h[i * src_stride + j] * i_rfactor[0];
xv = src->band_v[i * src_stride + j] * i_rfactor[1];
xd = src->band_d[i * src_stride + j] * i_rfactor[2];
ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else if ((left <= 0) && (right <= (w - 1))) /* Right border within frame, left outside */
{
for (i = start_row; i < end_row; ++i) {
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j = 0 */
xh = src->band_h[i * src_stride] * i_rfactor[0];
xv = src->band_v[i * src_stride] * i_rfactor[1];
xd = src->band_d[i * src_stride] * i_rfactor[2];
ADM_CM_THRESH_S_I_0(angles, flt_angles, csf_a_stride, &thr, w, h, i, 0);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
/* j within frame */
for (j = start_col; j < end_col; ++j) {
xh = src->band_h[i * src_stride + j] * i_rfactor[0];
xv = src->band_v[i * src_stride + j] * i_rfactor[1];
xd = src->band_d[i * src_stride + j] * i_rfactor[2];
ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else if ((left > 0) && (right > (w - 1))) /* Left border within frame, right outside */
{
for (i = start_row; i < end_row; ++i) {
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j within frame */
for (j = start_col; j < end_col; ++j) {
xh = src->band_h[i * src_stride + j] * i_rfactor[0];
xv = src->band_v[i * src_stride + j] * i_rfactor[1];
xd = src->band_d[i * src_stride + j] * i_rfactor[2];
ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
/* j = w-1 */
xh = src->band_h[i * src_stride + w - 1] * i_rfactor[0];
xv = src->band_v[i * src_stride + w - 1] * i_rfactor[1];
xd = src->band_d[i * src_stride + w - 1] * i_rfactor[2];
ADM_CM_THRESH_S_I_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, i, (w - 1));
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else /* Both borders outside frame */
{
for (i = start_row; i < end_row; ++i) {
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j = 0 */
xh = src->band_h[i * src_stride] * i_rfactor[0];
xv = src->band_v[i * src_stride] * i_rfactor[1];
xd = src->band_d[i * src_stride] * i_rfactor[2];
ADM_CM_THRESH_S_I_0(angles, flt_angles, csf_a_stride, &thr, w, h, i, 0);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
/* j within frame */
for (j = start_col; j < end_col; ++j) {
xh = src->band_h[i * src_stride + j] * i_rfactor[0];
xv = src->band_v[i * src_stride + j] * i_rfactor[1];
xd = src->band_d[i * src_stride + j] * i_rfactor[2];
ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
/* j = w-1 */
xh = src->band_h[i * src_stride + w - 1] * i_rfactor[0];
xv = src->band_v[i * src_stride + w - 1] * i_rfactor[1];
xd = src->band_d[i * src_stride + w - 1] * i_rfactor[2];
ADM_CM_THRESH_S_I_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, i, (w - 1));
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* i=h-1,j=0 */
if ((bottom > (h - 1)) && (left <= 0))
{
xh = src->band_h[(h - 1) * src_stride] * i_rfactor[0];
xv = src->band_v[(h - 1) * src_stride] * i_rfactor[1];
xd = src->band_d[(h - 1) * src_stride] * i_rfactor[2];
ADM_CM_THRESH_S_H_M_1_0(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1), 0);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
/* i=h-1,j */
if (bottom > (h - 1)) {
for (j = start_col; j < end_col; ++j) {
xh = src->band_h[(h - 1) * src_stride + j] * i_rfactor[0];
xv = src->band_v[(h - 1) * src_stride + j] * i_rfactor[1];
xd = src->band_d[(h - 1) * src_stride + j] * i_rfactor[2];
ADM_CM_THRESH_S_H_M_1_J(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1), j);
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
}
/* i-h-1,j=w-1 */
if ((bottom > (h - 1)) && (right > (w - 1)))
{
xh = src->band_h[(h - 1) * src_stride + w - 1] * i_rfactor[0];
xv = src->band_v[(h - 1) * src_stride + w - 1] * i_rfactor[1];
xd = src->band_d[(h - 1) * src_stride + w - 1] * i_rfactor[2];
ADM_CM_THRESH_S_H_M_1_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h,
(h - 1), (w - 1));
ADM_CM_ACCUM_ROUND(xh, thr, shift_xhsub, xh_sq, add_shift_xhsq, shift_xhsq, val,
add_shift_xhcub, shift_xhcub, accum_inner_h);
ADM_CM_ACCUM_ROUND(xv, thr, shift_xvsub, xv_sq, add_shift_xvsq, shift_xvsq, val,
add_shift_xvcub, shift_xvcub, accum_inner_v);
ADM_CM_ACCUM_ROUND(xd, thr, shift_xdsub, xd_sq, add_shift_xdsq, shift_xdsq, val,
add_shift_xdcub, shift_xdcub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
/**
* For h and v total shifts pending from last stage is 6 rfactor[0,1] has 21 shifts
* => after cubing (6+21)*3=81 after squaring shifted by 29
* hence pending is 52-shift's done based on width and height
*
* For d total shifts pending from last stage is 6 rfactor[2] has 23 shifts
* => after cubing (6+23)*3=87 after squaring shifted by 30
* hence pending is 57-shift's done based on width and height
*/
float f_accum_h = (float)(accum_h / pow(2, (52 - shift_xhcub - shift_inner_accum)));
float f_accum_v = (float)(accum_v / pow(2, (52 - shift_xvcub - shift_inner_accum)));
float f_accum_d = (float)(accum_d / pow(2, (57 - shift_xdcub - shift_inner_accum)));
float num_scale_h = powf(f_accum_h, 1.0f / 3.0f) + powf((bottom - top) *
(right - left) / 32.0f, 1.0f / 3.0f);
float num_scale_v = powf(f_accum_v, 1.0f / 3.0f) + powf((bottom - top) *
(right - left) / 32.0f, 1.0f / 3.0f);
float num_scale_d = powf(f_accum_d, 1.0f / 3.0f) + powf((bottom - top) *
(right - left) / 32.0f, 1.0f / 3.0f);
return (num_scale_h + num_scale_v + num_scale_d);
}
static float i4_adm_cm(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stride, int scale,
double adm_norm_view_dist, int adm_ref_display_height)
{
const i4_adm_dwt_band_t *src = &buf->i4_decouple_r;
const i4_adm_dwt_band_t *csf_f = &buf->i4_csf_f;
const i4_adm_dwt_band_t *csf_a = &buf->i4_csf_a;
// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
// 1 to 4 (from finest scale to coarsest scale).
float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1, adm_norm_view_dist, adm_ref_display_height);
float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2, adm_norm_view_dist, adm_ref_display_height);
float rfactor1[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
const uint32_t rfactor[3] = { (uint32_t)(rfactor1[0] * pow(2, 32)),
(uint32_t)(rfactor1[1] * pow(2, 32)),
(uint32_t)(rfactor1[2] * pow(2, 32)) };
const uint32_t shift_dst[3] = { 28, 28, 28 };
const uint32_t shift_flt[3] = { 32, 32, 32 };
int32_t add_bef_shift_dst[3], add_bef_shift_flt[3];
for (unsigned idx = 0; idx < 3; ++idx) {
add_bef_shift_dst[idx] = (1u << (shift_dst[idx] - 1));
add_bef_shift_flt[idx] = (1u << (shift_flt[idx] - 1));
}
uint32_t shift_cub = (uint32_t)ceil(log2(w));
uint32_t add_shift_cub = (uint32_t)pow(2, (shift_cub - 1));
uint32_t shift_inner_accum = (uint32_t)ceil(log2(h));
uint32_t add_shift_inner_accum = (uint32_t)pow(2, (shift_inner_accum - 1));
float final_shift[3] = { pow(2,(45 - shift_cub - shift_inner_accum)),
pow(2,(39 - shift_cub - shift_inner_accum)),
pow(2,(36 - shift_cub - shift_inner_accum)) };
const int32_t shift_sq = 30;
const int32_t add_shift_sq = 536870912; //2^29
const int32_t shift_sub = 0;
int32_t *angles[3] = { csf_a->band_h, csf_a->band_v, csf_a->band_d };
int32_t *flt_angles[3] = { csf_f->band_h, csf_f->band_v, csf_f->band_d };
/* The computation of the scales is not required for the regions which lie
* outside the frame borders
*/
const int left = w * ADM_BORDER_FACTOR - 0.5;
const int top = h * ADM_BORDER_FACTOR - 0.5;
const int right = w - left;
const int bottom = h - top;
const int start_col = (left > 1) ? left : 1;
const int end_col = (right < (w - 1)) ? right : (w - 1);
const int start_row = (top > 1) ? top : 1;
const int end_row = (bottom < (h - 1)) ? bottom : (h - 1);
int i, j;
int32_t xh, xv, xd, thr;
int32_t xh_sq, xv_sq, xd_sq;
int64_t val;
int64_t accum_h = 0, accum_v = 0, accum_d = 0;
int64_t accum_inner_h = 0, accum_inner_v = 0, accum_inner_d = 0;
/* i=0,j=0 */
if ((top <= 0) && (left <= 0))
{
xh = (int32_t)((((int64_t)src->band_h[0] * rfactor[0]) + add_bef_shift_dst[scale - 1])
>> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[0] * rfactor[1]) + add_bef_shift_dst[scale - 1])
>> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[0] * rfactor[2]) + add_bef_shift_dst[scale - 1])
>> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_0_0(angles, flt_angles, csf_a_stride, &thr, w, h, 0, 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* i=0, j */
if (top <= 0)
{
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_0_J(angles, flt_angles, csf_a_stride, &thr, w, h,
0, j, add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
}
/* i=0,j=w-1 */
if ((top <= 0) && (right > (w - 1)))
{
xh = (int32_t)((((int64_t)src->band_h[w - 1] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[w - 1] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[w - 1] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_0_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, 0, (w - 1),
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
if ((left > 0) && (right <= (w - 1))) /* Completely within frame */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else if ((left <= 0) && (right <= (w - 1))) /* Right border within frame, left outside */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j = 0 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_0(angles, flt_angles, csf_a_stride, &thr, w, h, i, 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
/* j within frame */
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else if ((left > 0) && (right > (w - 1))) /* Left border within frame, right outside */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j within frame */
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* j = w-1 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + w - 1] * rfactor[i * src_stride + w - 1])
+ add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + w - 1] * rfactor[i * src_stride + w - 1])
+ add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + w - 1] * rfactor[i * src_stride + w - 1])
+ add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, i, (w - 1),
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else /* Both borders outside frame */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j = 0 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_0(angles, flt_angles, csf_a_stride, &thr, w, h, i, 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
/* j within frame */
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* j = w-1 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + w - 1] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + w - 1] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + w - 1] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, i, (w - 1),
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* i=h-1,j=0 */
if ((bottom > (h - 1)) && (left <= 0))
{
xh = (int32_t)((((int64_t)src->band_h[(h - 1) * src_stride] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[(h - 1) * src_stride] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[(h - 1) * src_stride] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_H_M_1_0(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1), 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* i=h-1,j */
if (bottom > (h - 1))
{
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[(h - 1) * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[(h - 1) * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[(h - 1) * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_H_M_1_J(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1), j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
}
/* i-h-1,j=w-1 */
if ((bottom > (h - 1)) && (right > (w - 1)))
{
xh = (int32_t)((((int64_t)src->band_h[(h - 1) * src_stride + w - 1] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[(h - 1) * src_stride + w - 1] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[(h - 1) * src_stride + w - 1] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_H_M_1_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1),
(w - 1), add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
/**
* Converted to floating-point for calculating the final scores
* Final shifts is calculated from 3*(shifts_from_previous_stage(i.e src comes from dwt)+32)-total_shifts_done_in_this_function
*/
float f_accum_h = (float)(accum_h / final_shift[scale - 1]);
float f_accum_v = (float)(accum_v / final_shift[scale - 1]);
float f_accum_d = (float)(accum_d / final_shift[scale - 1]);
float num_scale_h = powf(f_accum_h, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
float num_scale_v = powf(f_accum_v, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
float num_scale_d = powf(f_accum_d, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
return (num_scale_h + num_scale_v + num_scale_d);
}
static void i16_to_i32(adm_dwt_band_t *src, i4_adm_dwt_band_t *dst,
int w, int h, int stride)
{
for (int i = 0; i < (h + 1) / 2; ++i) {
int16_t *src_band_a_addr = &src->band_a[i * stride];
int32_t *dst_band_a_addr = &dst->band_a[i * stride];
for (int j = 0; j < (w + 1) / 2; ++j) {
*(dst_band_a_addr++) = (int32_t)(*(src_band_a_addr++));
}
}
}
static void adm_dwt2_8(const uint8_t *src, const adm_dwt_band_t *dst,
AdmBuffer *buf, int w, int h, int src_stride,
int dst_stride)
{
const int16_t *filter_lo = dwt2_db2_coeffs_lo;
const int16_t *filter_hi = dwt2_db2_coeffs_hi;
const int16_t shift_VP = 8;
const int16_t shift_HP = 16;
const int32_t add_shift_VP = 128;
const int32_t add_shift_HP = 32768;
int **ind_y = buf->ind_y;
int **ind_x = buf->ind_x;
int16_t *tmplo = (int16_t *)buf->tmp_ref;
int16_t *tmphi = tmplo + w;
int32_t accum;
for (int i = 0; i < (h + 1) / 2; ++i) {
/* Vertical pass. */
for (int j = 0; j < w; ++j) {
uint16_t u_s0 = src[ind_y[0][i] * src_stride + j];
uint16_t u_s1 = src[ind_y[1][i] * src_stride + j];
uint16_t u_s2 = src[ind_y[2][i] * src_stride + j];
uint16_t u_s3 = src[ind_y[3][i] * src_stride + j];
accum = 0;
accum += (int32_t)filter_lo[0] * (int32_t)u_s0;
accum += (int32_t)filter_lo[1] * (int32_t)u_s1;
accum += (int32_t)filter_lo[2] * (int32_t)u_s2;
accum += (int32_t)filter_lo[3] * (int32_t)u_s3;
/* normalizing is done for range from(0 to N) to (-N/2 to N/2) */
accum -= (int32_t)dwt2_db2_coeffs_lo_sum * add_shift_VP;
tmplo[j] = (accum + add_shift_VP) >> shift_VP;
accum = 0;
accum += (int32_t)filter_hi[0] * (int32_t)u_s0;
accum += (int32_t)filter_hi[1] * (int32_t)u_s1;
accum += (int32_t)filter_hi[2] * (int32_t)u_s2;
accum += (int32_t)filter_hi[3] * (int32_t)u_s3;
/* normalizing is done for range from(0 to N) to (-N/2 to N/2) */
accum -= (int32_t)dwt2_db2_coeffs_hi_sum * add_shift_VP;
tmphi[j] = (accum + add_shift_VP) >> shift_VP;
}
/* Horizontal pass (lo and hi). */
for (int j = 0; j < (w + 1) / 2; ++j) {
int j0 = ind_x[0][j];
int j1 = ind_x[1][j];
int j2 = ind_x[2][j];
int j3 = ind_x[3][j];
int16_t s0 = tmplo[j0];
int16_t s1 = tmplo[j1];
int16_t s2 = tmplo[j2];
int16_t s3 = tmplo[j3];
accum = 0;
accum += (int32_t)filter_lo[0] * s0;
accum += (int32_t)filter_lo[1] * s1;
accum += (int32_t)filter_lo[2] * s2;
accum += (int32_t)filter_lo[3] * s3;
dst->band_a[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
accum = 0;
accum += (int32_t)filter_hi[0] * s0;
accum += (int32_t)filter_hi[1] * s1;
accum += (int32_t)filter_hi[2] * s2;
accum += (int32_t)filter_hi[3] * s3;
dst->band_v[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
s0 = tmphi[j0];
s1 = tmphi[j1];
s2 = tmphi[j2];
s3 = tmphi[j3];
accum = 0;
accum += (int32_t)filter_lo[0] * s0;
accum += (int32_t)filter_lo[1] * s1;
accum += (int32_t)filter_lo[2] * s2;
accum += (int32_t)filter_lo[3] * s3;
dst->band_h[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
accum = 0;
accum += (int32_t)filter_hi[0] * s0;
accum += (int32_t)filter_hi[1] * s1;
accum += (int32_t)filter_hi[2] * s2;
accum += (int32_t)filter_hi[3] * s3;
dst->band_d[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
}
}
}
static void adm_dwt2_16(const uint16_t *src, const adm_dwt_band_t *dst, AdmBuffer *buf, int w, int h,
int src_stride, int dst_stride, int inp_size_bits)
{
const int16_t *filter_lo = dwt2_db2_coeffs_lo;
const int16_t *filter_hi = dwt2_db2_coeffs_hi;
const int16_t shift_VP = inp_size_bits;
const int16_t shift_HP = 16;
const int32_t add_shift_VP = 1 << (inp_size_bits - 1);
const int32_t add_shift_HP = 32768;
int **ind_y = buf->ind_y;
int **ind_x = buf->ind_x;
int16_t *tmplo = (int16_t *)buf->tmp_ref;
int16_t *tmphi = tmplo + w;
int32_t accum;
for (int i = 0; i < (h + 1) / 2; ++i) {
/* Vertical pass. */
for (int j = 0; j < w; ++j) {
uint16_t u_s0 = src[ind_y[0][i] * src_stride + j];
uint16_t u_s1 = src[ind_y[1][i] * src_stride + j];
uint16_t u_s2 = src[ind_y[2][i] * src_stride + j];
uint16_t u_s3 = src[ind_y[3][i] * src_stride + j];
accum = 0;
accum += (int32_t)filter_lo[0] * (int32_t)u_s0;
accum += (int32_t)filter_lo[1] * (int32_t)u_s1;
accum += (int32_t)filter_lo[2] * (int32_t)u_s2;
accum += (int32_t)filter_lo[3] * (int32_t)u_s3;
/* normalizing is done for range from(0 to N) to (-N/2 to N/2) */
accum -= (int32_t)dwt2_db2_coeffs_lo_sum * add_shift_VP;
tmplo[j] = (accum + add_shift_VP) >> shift_VP;
accum = 0;
accum += (int32_t)filter_hi[0] * (int32_t)u_s0;
accum += (int32_t)filter_hi[1] * (int32_t)u_s1;
accum += (int32_t)filter_hi[2] * (int32_t)u_s2;
accum += (int32_t)filter_hi[3] * (int32_t)u_s3;
/* normalizing is done for range from(0 to N) to (-N/2 to N/2) */
accum -= (int32_t)dwt2_db2_coeffs_hi_sum * add_shift_VP;
tmphi[j] = (accum + add_shift_VP) >> shift_VP;
}
/* Horizontal pass (lo and hi). */
for (int j = 0; j < (w + 1) / 2; ++j) {
int j0 = ind_x[0][j];
int j1 = ind_x[1][j];
int j2 = ind_x[2][j];
int j3 = ind_x[3][j];
int16_t s0 = tmplo[j0];
int16_t s1 = tmplo[j1];
int16_t s2 = tmplo[j2];
int16_t s3 = tmplo[j3];
accum = 0;
accum += (int32_t)filter_lo[0] * s0;
accum += (int32_t)filter_lo[1] * s1;
accum += (int32_t)filter_lo[2] * s2;
accum += (int32_t)filter_lo[3] * s3;
dst->band_a[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
accum = 0;
accum += (int32_t)filter_hi[0] * s0;
accum += (int32_t)filter_hi[1] * s1;
accum += (int32_t)filter_hi[2] * s2;
accum += (int32_t)filter_hi[3] * s3;
dst->band_v[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
s0 = tmphi[j0];
s1 = tmphi[j1];
s2 = tmphi[j2];
s3 = tmphi[j3];
accum = 0;
accum += (int32_t)filter_lo[0] * s0;
accum += (int32_t)filter_lo[1] * s1;
accum += (int32_t)filter_lo[2] * s2;
accum += (int32_t)filter_lo[3] * s3;
dst->band_h[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
accum = 0;
accum += (int32_t)filter_hi[0] * s0;
accum += (int32_t)filter_hi[1] * s1;
accum += (int32_t)filter_hi[2] * s2;
accum += (int32_t)filter_hi[3] * s3;
dst->band_d[i * dst_stride + j] = (accum + add_shift_HP) >> shift_HP;
}
}
}
static void adm_dwt2_s123_combined(const int32_t *i4_ref_scale, const int32_t *i4_curr_dis,
AdmBuffer *buf, int w, int h, int ref_stride,
int dis_stride, int dst_stride, int scale)
{
const i4_adm_dwt_band_t *i4_ref_dwt2 = &buf->i4_ref_dwt2;
const i4_adm_dwt_band_t *i4_dis_dwt2 = &buf->i4_dis_dwt2;
int **ind_y = buf->ind_y;
int **ind_x = buf->ind_x;
const int16_t *filter_lo = dwt2_db2_coeffs_lo;
const int16_t *filter_hi = dwt2_db2_coeffs_hi;
const int32_t add_bef_shift_round_VP[3] = { 0, 32768, 32768 };
const int32_t add_bef_shift_round_HP[3] = { 16384, 32768, 16384 };
const int16_t shift_VerticalPass[3] = { 0, 16, 16 };
const int16_t shift_HorizontalPass[3] = { 15, 16, 15 };
int32_t *tmplo_ref = buf->tmp_ref;
int32_t *tmphi_ref = tmplo_ref + w;
int32_t *tmplo_dis = tmphi_ref + w;
int32_t *tmphi_dis = tmplo_dis + w;
int32_t s10, s11, s12, s13;
int64_t accum_ref;
for (int i = 0; i < (h + 1) / 2; ++i)
{
/* Vertical pass. */
for (int j = 0; j < w; ++j)
{
s10 = i4_ref_scale[ind_y[0][i] * ref_stride + j];
s11 = i4_ref_scale[ind_y[1][i] * ref_stride + j];
s12 = i4_ref_scale[ind_y[2][i] * ref_stride + j];
s13 = i4_ref_scale[ind_y[3][i] * ref_stride + j];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
tmplo_ref[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
tmphi_ref[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
s10 = i4_curr_dis[ind_y[0][i] * dis_stride + j];
s11 = i4_curr_dis[ind_y[1][i] * dis_stride + j];
s12 = i4_curr_dis[ind_y[2][i] * dis_stride + j];
s13 = i4_curr_dis[ind_y[3][i] * dis_stride + j];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
tmplo_dis[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
tmphi_dis[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
}
/* Horizontal pass (lo and hi). */
for (int j = 0; j < (w + 1) / 2; ++j)
{
int j0 = ind_x[0][j];
int j1 = ind_x[1][j];
int j2 = ind_x[2][j];
int j3 = ind_x[3][j];
s10 = tmplo_ref[j0];
s11 = tmplo_ref[j1];
s12 = tmplo_ref[j2];
s13 = tmplo_ref[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_ref_dwt2->band_a[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_ref_dwt2->band_v[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
s10 = tmphi_ref[j0];
s11 = tmphi_ref[j1];
s12 = tmphi_ref[j2];
s13 = tmphi_ref[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_ref_dwt2->band_h[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_ref_dwt2->band_d[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
s10 = tmplo_dis[j0];
s11 = tmplo_dis[j1];
s12 = tmplo_dis[j2];
s13 = tmplo_dis[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_dis_dwt2->band_a[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_dis_dwt2->band_v[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
s10 = tmphi_dis[j0];
s11 = tmphi_dis[j1];
s12 = tmphi_dis[j2];
s13 = tmphi_dis[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_dis_dwt2->band_h[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_dis_dwt2->band_d[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
}
}
}
void integer_compute_adm(AdmState *s, VmafPicture *ref_pic, VmafPicture *dis_pic,
double *score, double *score_num, double *score_den, double *scores, AdmBuffer *buf,
double adm_enhn_gain_limit,
double adm_norm_view_dist, int adm_ref_display_height)
{
int w = ref_pic->w[0];
int h = ref_pic->h[0];
const double numden_limit = 1e-10 * (w * h) / (1920.0 * 1080.0);
size_t curr_ref_stride;
size_t curr_dis_stride;
size_t buf_stride = buf->ind_size_x >> 2;
int32_t *i4_curr_ref_scale = NULL;
int32_t *i4_curr_dis_scale = NULL;
if (ref_pic->bpc == 8) {
curr_ref_stride = ref_pic->stride[0];
curr_dis_stride = dis_pic->stride[0];
}
else {
curr_ref_stride = ref_pic->stride[0] >> 1;
curr_dis_stride = dis_pic->stride[0] >> 1;
}
double num = 0;
double den = 0;
for (unsigned scale = 0; scale < 4; ++scale) {
float num_scale = 0.0;
float den_scale = 0.0;
dwt2_src_indices_filt(buf->ind_y, buf->ind_x, w, h);
if(scale==0) {
if (ref_pic->bpc == 8) {
s->dwt2_8(ref_pic->data[0], &buf->ref_dwt2, buf, w, h,
curr_ref_stride, buf_stride);
s->dwt2_8(dis_pic->data[0], &buf->dis_dwt2, buf, w, h,
curr_dis_stride, buf_stride);
}
else {
adm_dwt2_16(ref_pic->data[0], &buf->ref_dwt2, buf, w, h,
curr_ref_stride, buf_stride, ref_pic->bpc);
adm_dwt2_16(dis_pic->data[0], &buf->dis_dwt2, buf, w, h,
curr_dis_stride, buf_stride, dis_pic->bpc);
}
i16_to_i32(&buf->ref_dwt2, &buf->i4_ref_dwt2, w, h, buf_stride);
i16_to_i32(&buf->dis_dwt2, &buf->i4_dis_dwt2, w, h, buf_stride);
w = (w + 1) / 2;
h = (h + 1) / 2;
adm_decouple(buf, w, h, buf_stride, adm_enhn_gain_limit);
den_scale = adm_csf_den_scale(&buf->ref_dwt2, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
adm_csf(buf, w, h, buf_stride, adm_norm_view_dist, adm_ref_display_height);
num_scale = adm_cm(buf, w, h, buf_stride, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
}
else {
adm_dwt2_s123_combined(i4_curr_ref_scale, i4_curr_dis_scale, buf, w, h, curr_ref_stride,
curr_dis_stride, buf_stride, scale);
w = (w + 1) / 2;
h = (h + 1) / 2;
adm_decouple_s123(buf, w, h, buf_stride, adm_enhn_gain_limit);
den_scale = adm_csf_den_s123(
&buf->i4_ref_dwt2, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
i4_adm_csf(buf, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
num_scale = i4_adm_cm(buf, w, h, buf_stride, buf_stride, scale,
adm_norm_view_dist, adm_ref_display_height);
}
num += num_scale;
den += den_scale;
i4_curr_ref_scale = buf->i4_ref_dwt2.band_a;
i4_curr_dis_scale = buf->i4_dis_dwt2.band_a;
curr_ref_stride = buf_stride;
curr_dis_stride = buf_stride;
scores[2 * scale + 0] = num_scale;
scores[2 * scale + 1] = den_scale;
}
num = num < numden_limit ? 0 : num;
den = den < numden_limit ? 0 : den;
if (den == 0.0) {
*score = 1.0f;
}
else {
*score = num / den;
}
*score_num = num;
*score_den = den;
}
static inline void *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t stride)
{
band->band_a = (int16_t *)data_top; data_top += stride;
band->band_h = (int16_t *)data_top; data_top += stride;
band->band_v = (int16_t *)data_top; data_top += stride;
band->band_d = (int16_t *)data_top; data_top += stride;
return data_top;
}
static inline void *init_index(int32_t **index, char *data_top, size_t stride)
{
index[0] = (int32_t *)data_top; data_top += stride;
index[1] = (int32_t *)data_top; data_top += stride;
index[2] = (int32_t *)data_top; data_top += stride;
index[3] = (int32_t *)data_top; data_top += stride;
return data_top;
}
static inline void *i4_init_dwt_band(i4_adm_dwt_band_t *band, char *data_top, size_t stride)
{
band->band_a = (int32_t *)data_top; data_top += stride;
band->band_h = (int32_t *)data_top; data_top += stride;
band->band_v = (int32_t *)data_top; data_top += stride;
band->band_d = (int32_t *)data_top; data_top += stride;
return data_top;
}
static inline void *init_dwt_band_hvd(adm_dwt_band_t *band, char *data_top, size_t stride)
{
band->band_a = NULL;
band->band_h = (int16_t *)data_top; data_top += stride;
band->band_v = (int16_t *)data_top; data_top += stride;
band->band_d = (int16_t *)data_top; data_top += stride;
return data_top;
}
static inline void *i4_init_dwt_band_hvd(i4_adm_dwt_band_t *band, char *data_top, size_t stride)
{
band->band_a = NULL;
band->band_h = (int32_t *)data_top; data_top += stride;
band->band_v = (int32_t *)data_top; data_top += stride;
band->band_d = (int32_t *)data_top; data_top += stride;
return data_top;
}
static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
unsigned bpc, unsigned w, unsigned h)
{
AdmState *s = fex->priv;
(void) pix_fmt;
(void) bpc;
if (w <= 32 || h <= 32) {
vmaf_log(VMAF_LOG_LEVEL_ERROR,
"%s: invalid size (%dx%d), "
"width/height must be greater than 32\n",
fex->name, w, h);
return -EINVAL;
}
s->dwt2_8 = adm_dwt2_8;
#if ARCH_X86
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_X86_CPU_FLAG_AVX2) {
if (!(w % 8)) s->dwt2_8 = adm_dwt2_8_avx2;
}
#elif ARCH_AARCH64
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_ARM_CPU_FLAG_NEON) {
if (!(w % 8)) s->dwt2_8 = adm_dwt2_8_neon;
}
#endif
s->integer_stride = ALIGN_CEIL(w * sizeof(int32_t));
s->buf.ind_size_x = ALIGN_CEIL(((w + 1) / 2) * sizeof(int32_t));
s->buf.ind_size_y = ALIGN_CEIL(((h + 1) / 2) * sizeof(int32_t));
size_t buf_sz_one = s->buf.ind_size_x * ((h + 1) / 2);
s->buf.data_buf = aligned_malloc(buf_sz_one * NUM_BUFS_ADM, MAX_ALIGN);
if (!s->buf.data_buf) goto fail;
s->buf.tmp_ref = aligned_malloc(s->integer_stride * 4, MAX_ALIGN);
if (!s->buf.tmp_ref) goto fail;
s->buf.buf_x_orig = aligned_malloc(s->buf.ind_size_x * 4, MAX_ALIGN);
if (!s->buf.buf_x_orig) goto fail;
s->buf.buf_y_orig = aligned_malloc(s->buf.ind_size_y * 4, MAX_ALIGN);
if (!s->buf.buf_y_orig) goto fail;
void *data_top = s->buf.data_buf;
data_top = init_dwt_band(&s->buf.ref_dwt2, data_top, buf_sz_one / 2);
data_top = init_dwt_band(&s->buf.dis_dwt2, data_top, buf_sz_one / 2);
data_top = init_dwt_band_hvd(&s->buf.decouple_r, data_top, buf_sz_one / 2);
data_top = init_dwt_band_hvd(&s->buf.decouple_a, data_top, buf_sz_one / 2);
data_top = init_dwt_band_hvd(&s->buf.csf_a, data_top, buf_sz_one / 2);
data_top = init_dwt_band_hvd(&s->buf.csf_f, data_top, buf_sz_one / 2);
data_top = i4_init_dwt_band(&s->buf.i4_ref_dwt2, data_top, buf_sz_one);
data_top = i4_init_dwt_band(&s->buf.i4_dis_dwt2, data_top, buf_sz_one);
data_top = i4_init_dwt_band_hvd(&s->buf.i4_decouple_r, data_top, buf_sz_one);
data_top = i4_init_dwt_band_hvd(&s->buf.i4_decouple_a, data_top, buf_sz_one);
data_top = i4_init_dwt_band_hvd(&s->buf.i4_csf_a, data_top, buf_sz_one);
data_top = i4_init_dwt_band_hvd(&s->buf.i4_csf_f, data_top, buf_sz_one);
void *ind_buf_y = s->buf.buf_y_orig;
init_index(s->buf.ind_y, ind_buf_y, s->buf.ind_size_y);
void *ind_buf_x = s->buf.buf_x_orig;
init_index(s->buf.ind_x, ind_buf_x, s->buf.ind_size_x);
div_lookup_generator();
s->feature_name_dict =
vmaf_feature_name_dict_from_provided_features(fex->provided_features,
fex->options, s);
if (!s->feature_name_dict) goto fail;
return 0;
fail:
if (s->buf.data_buf) aligned_free(s->buf.data_buf);
if (s->buf.tmp_ref) aligned_free(s->buf.tmp_ref);
if (s->buf.buf_x_orig) aligned_free(s->buf.buf_x_orig);
if (s->buf.buf_y_orig) aligned_free(s->buf.buf_y_orig);
vmaf_dictionary_free(&s->feature_name_dict);
return -ENOMEM;
}
static int extract(VmafFeatureExtractor *fex,
VmafPicture *ref_pic, VmafPicture *ref_pic_90,
VmafPicture *dist_pic, VmafPicture *dist_pic_90,
unsigned index, VmafFeatureCollector *feature_collector)
{
AdmState *s = fex->priv;
int err = 0;
(void) ref_pic_90;
(void) dist_pic_90;
double score, score_num, score_den;
double scores[8];
// current implementation is limited by the 16-bit data pipeline, thus
// cannot handle an angular frequency smaller than 1080p * 3H
if (s->adm_norm_view_dist * s->adm_ref_display_height <
DEFAULT_ADM_NORM_VIEW_DIST * DEFAULT_ADM_REF_DISPLAY_HEIGHT) {
return -EINVAL;
}
integer_compute_adm(s, ref_pic, dist_pic, &score, &score_num, &score_den,
scores, &s->buf,
s->adm_enhn_gain_limit,
s->adm_norm_view_dist, s->adm_ref_display_height);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "VMAF_integer_feature_adm2_score", score,
index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_scale0", scores[0] / scores[1],
index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_scale1", scores[2] / scores[3],
index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_scale2", scores[4] / scores[5],
index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_scale3", scores[6] / scores[7],
index);
if (!s->debug) return err;
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm", score, index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_num", score_num, index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_den", score_den, index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_num_scale0", scores[0], index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_den_scale0", scores[1], index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_num_scale1", scores[2], index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_den_scale1", scores[3], index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_num_scale2", scores[4], index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_den_scale2", scores[5], index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_num_scale3", scores[6], index);
err |= vmaf_feature_collector_append_with_dict(feature_collector,
s->feature_name_dict, "integer_adm_den_scale3", scores[7], index);
return err;
}
static int close(VmafFeatureExtractor *fex)
{
AdmState *s = fex->priv;
if (s->buf.data_buf) aligned_free(s->buf.data_buf);
if (s->buf.tmp_ref) aligned_free(s->buf.tmp_ref);
if (s->buf.buf_x_orig) aligned_free(s->buf.buf_x_orig);
if (s->buf.buf_y_orig) aligned_free(s->buf.buf_y_orig);
vmaf_dictionary_free(&s->feature_name_dict);
return 0;
}
static const char *provided_features[] = {
"VMAF_integer_feature_adm2_score", "integer_adm_scale0",
"integer_adm_scale1", "integer_adm_scale2", "integer_adm_scale3",
"integer_adm", "integer_adm_num", "integer_adm_den",
"integer_adm_num_scale0", "integer_adm_den_scale0", "integer_adm_num_scale1",
"integer_adm_den_scale1", "integer_adm_num_scale2", "integer_adm_den_scale2",
"integer_adm_num_scale3", "integer_adm_den_scale3",
NULL
};
VmafFeatureExtractor vmaf_fex_integer_adm = {
.name = "adm",
.init = init,
.extract = extract,
.options = options,
.close = close,
.priv_size = sizeof(AdmState),
.provided_features = provided_features,
};