in libvmaf/src/feature/integer_adm.c [1647:2053]
static float i4_adm_cm(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stride, int scale,
double adm_norm_view_dist, int adm_ref_display_height)
{
const i4_adm_dwt_band_t *src = &buf->i4_decouple_r;
const i4_adm_dwt_band_t *csf_f = &buf->i4_csf_f;
const i4_adm_dwt_band_t *csf_a = &buf->i4_csf_a;
// for ADM: scales goes from 0 to 3 but in noise floor paper, it goes from
// 1 to 4 (from finest scale to coarsest scale).
float factor1 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 1, adm_norm_view_dist, adm_ref_display_height);
float factor2 = dwt_quant_step(&dwt_7_9_YCbCr_threshold[0], scale, 2, adm_norm_view_dist, adm_ref_display_height);
float rfactor1[3] = { 1.0f / factor1, 1.0f / factor1, 1.0f / factor2 };
const uint32_t rfactor[3] = { (uint32_t)(rfactor1[0] * pow(2, 32)),
(uint32_t)(rfactor1[1] * pow(2, 32)),
(uint32_t)(rfactor1[2] * pow(2, 32)) };
const uint32_t shift_dst[3] = { 28, 28, 28 };
const uint32_t shift_flt[3] = { 32, 32, 32 };
int32_t add_bef_shift_dst[3], add_bef_shift_flt[3];
for (unsigned idx = 0; idx < 3; ++idx) {
add_bef_shift_dst[idx] = (1u << (shift_dst[idx] - 1));
add_bef_shift_flt[idx] = (1u << (shift_flt[idx] - 1));
}
uint32_t shift_cub = (uint32_t)ceil(log2(w));
uint32_t add_shift_cub = (uint32_t)pow(2, (shift_cub - 1));
uint32_t shift_inner_accum = (uint32_t)ceil(log2(h));
uint32_t add_shift_inner_accum = (uint32_t)pow(2, (shift_inner_accum - 1));
float final_shift[3] = { pow(2,(45 - shift_cub - shift_inner_accum)),
pow(2,(39 - shift_cub - shift_inner_accum)),
pow(2,(36 - shift_cub - shift_inner_accum)) };
const int32_t shift_sq = 30;
const int32_t add_shift_sq = 536870912; //2^29
const int32_t shift_sub = 0;
int32_t *angles[3] = { csf_a->band_h, csf_a->band_v, csf_a->band_d };
int32_t *flt_angles[3] = { csf_f->band_h, csf_f->band_v, csf_f->band_d };
/* The computation of the scales is not required for the regions which lie
* outside the frame borders
*/
const int left = w * ADM_BORDER_FACTOR - 0.5;
const int top = h * ADM_BORDER_FACTOR - 0.5;
const int right = w - left;
const int bottom = h - top;
const int start_col = (left > 1) ? left : 1;
const int end_col = (right < (w - 1)) ? right : (w - 1);
const int start_row = (top > 1) ? top : 1;
const int end_row = (bottom < (h - 1)) ? bottom : (h - 1);
int i, j;
int32_t xh, xv, xd, thr;
int32_t xh_sq, xv_sq, xd_sq;
int64_t val;
int64_t accum_h = 0, accum_v = 0, accum_d = 0;
int64_t accum_inner_h = 0, accum_inner_v = 0, accum_inner_d = 0;
/* i=0,j=0 */
if ((top <= 0) && (left <= 0))
{
xh = (int32_t)((((int64_t)src->band_h[0] * rfactor[0]) + add_bef_shift_dst[scale - 1])
>> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[0] * rfactor[1]) + add_bef_shift_dst[scale - 1])
>> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[0] * rfactor[2]) + add_bef_shift_dst[scale - 1])
>> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_0_0(angles, flt_angles, csf_a_stride, &thr, w, h, 0, 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* i=0, j */
if (top <= 0)
{
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_0_J(angles, flt_angles, csf_a_stride, &thr, w, h,
0, j, add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
}
/* i=0,j=w-1 */
if ((top <= 0) && (right > (w - 1)))
{
xh = (int32_t)((((int64_t)src->band_h[w - 1] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[w - 1] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[w - 1] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_0_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, 0, (w - 1),
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
if ((left > 0) && (right <= (w - 1))) /* Completely within frame */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else if ((left <= 0) && (right <= (w - 1))) /* Right border within frame, left outside */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j = 0 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_0(angles, flt_angles, csf_a_stride, &thr, w, h, i, 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
/* j within frame */
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else if ((left > 0) && (right > (w - 1))) /* Left border within frame, right outside */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j within frame */
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* j = w-1 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + w - 1] * rfactor[i * src_stride + w - 1])
+ add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + w - 1] * rfactor[i * src_stride + w - 1])
+ add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + w - 1] * rfactor[i * src_stride + w - 1])
+ add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, i, (w - 1),
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
else /* Both borders outside frame */
{
for (i = start_row; i < end_row; ++i)
{
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* j = 0 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_0(angles, flt_angles, csf_a_stride, &thr, w, h, i, 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
/* j within frame */
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_J(angles, flt_angles, csf_a_stride, &thr, w, h, i, j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* j = w-1 */
xh = (int32_t)((((int64_t)src->band_h[i * src_stride + w - 1] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[i * src_stride + w - 1] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[i * src_stride + w - 1] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_I_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, i, (w - 1),
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
}
}
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;
/* i=h-1,j=0 */
if ((bottom > (h - 1)) && (left <= 0))
{
xh = (int32_t)((((int64_t)src->band_h[(h - 1) * src_stride] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[(h - 1) * src_stride] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[(h - 1) * src_stride] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_H_M_1_0(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1), 0,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
/* i=h-1,j */
if (bottom > (h - 1))
{
for (j = start_col; j < end_col; ++j)
{
xh = (int32_t)((((int64_t)src->band_h[(h - 1) * src_stride + j] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[(h - 1) * src_stride + j] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[(h - 1) * src_stride + j] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_H_M_1_J(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1), j,
add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
}
/* i-h-1,j=w-1 */
if ((bottom > (h - 1)) && (right > (w - 1)))
{
xh = (int32_t)((((int64_t)src->band_h[(h - 1) * src_stride + w - 1] * rfactor[0]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xv = (int32_t)((((int64_t)src->band_v[(h - 1) * src_stride + w - 1] * rfactor[1]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
xd = (int32_t)((((int64_t)src->band_d[(h - 1) * src_stride + w - 1] * rfactor[2]) +
add_bef_shift_dst[scale - 1]) >> shift_dst[scale - 1]);
I4_ADM_CM_THRESH_S_H_M_1_W_M_1(angles, flt_angles, csf_a_stride, &thr, w, h, (h - 1),
(w - 1), add_bef_shift_flt[scale - 1], shift_flt[scale - 1]);
I4_ADM_CM_ACCUM_ROUND(xh, thr, shift_sub, xh_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_h);
I4_ADM_CM_ACCUM_ROUND(xv, thr, shift_sub, xv_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_v);
I4_ADM_CM_ACCUM_ROUND(xd, thr, shift_sub, xd_sq, add_shift_sq, shift_sq, val,
add_shift_cub, shift_cub, accum_inner_d);
}
accum_h += (accum_inner_h + add_shift_inner_accum) >> shift_inner_accum;
accum_v += (accum_inner_v + add_shift_inner_accum) >> shift_inner_accum;
accum_d += (accum_inner_d + add_shift_inner_accum) >> shift_inner_accum;
/**
* Converted to floating-point for calculating the final scores
* Final shifts is calculated from 3*(shifts_from_previous_stage(i.e src comes from dwt)+32)-total_shifts_done_in_this_function
*/
float f_accum_h = (float)(accum_h / final_shift[scale - 1]);
float f_accum_v = (float)(accum_v / final_shift[scale - 1]);
float f_accum_d = (float)(accum_d / final_shift[scale - 1]);
float num_scale_h = powf(f_accum_h, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
float num_scale_v = powf(f_accum_v, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
float num_scale_d = powf(f_accum_d, 1.0f / 3.0f) + powf((bottom - top) * (right - left) / 32.0f, 1.0f / 3.0f);
return (num_scale_h + num_scale_v + num_scale_d);
}