in libvmaf/src/feature/integer_adm.c [2262:2423]
static void adm_dwt2_s123_combined(const int32_t *i4_ref_scale, const int32_t *i4_curr_dis,
AdmBuffer *buf, int w, int h, int ref_stride,
int dis_stride, int dst_stride, int scale)
{
const i4_adm_dwt_band_t *i4_ref_dwt2 = &buf->i4_ref_dwt2;
const i4_adm_dwt_band_t *i4_dis_dwt2 = &buf->i4_dis_dwt2;
int **ind_y = buf->ind_y;
int **ind_x = buf->ind_x;
const int16_t *filter_lo = dwt2_db2_coeffs_lo;
const int16_t *filter_hi = dwt2_db2_coeffs_hi;
const int32_t add_bef_shift_round_VP[3] = { 0, 32768, 32768 };
const int32_t add_bef_shift_round_HP[3] = { 16384, 32768, 16384 };
const int16_t shift_VerticalPass[3] = { 0, 16, 16 };
const int16_t shift_HorizontalPass[3] = { 15, 16, 15 };
int32_t *tmplo_ref = buf->tmp_ref;
int32_t *tmphi_ref = tmplo_ref + w;
int32_t *tmplo_dis = tmphi_ref + w;
int32_t *tmphi_dis = tmplo_dis + w;
int32_t s10, s11, s12, s13;
int64_t accum_ref;
for (int i = 0; i < (h + 1) / 2; ++i)
{
/* Vertical pass. */
for (int j = 0; j < w; ++j)
{
s10 = i4_ref_scale[ind_y[0][i] * ref_stride + j];
s11 = i4_ref_scale[ind_y[1][i] * ref_stride + j];
s12 = i4_ref_scale[ind_y[2][i] * ref_stride + j];
s13 = i4_ref_scale[ind_y[3][i] * ref_stride + j];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
tmplo_ref[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
tmphi_ref[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
s10 = i4_curr_dis[ind_y[0][i] * dis_stride + j];
s11 = i4_curr_dis[ind_y[1][i] * dis_stride + j];
s12 = i4_curr_dis[ind_y[2][i] * dis_stride + j];
s13 = i4_curr_dis[ind_y[3][i] * dis_stride + j];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
tmplo_dis[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
tmphi_dis[j] = (int32_t)((accum_ref + add_bef_shift_round_VP[scale - 1])
>> shift_VerticalPass[scale - 1]);
}
/* Horizontal pass (lo and hi). */
for (int j = 0; j < (w + 1) / 2; ++j)
{
int j0 = ind_x[0][j];
int j1 = ind_x[1][j];
int j2 = ind_x[2][j];
int j3 = ind_x[3][j];
s10 = tmplo_ref[j0];
s11 = tmplo_ref[j1];
s12 = tmplo_ref[j2];
s13 = tmplo_ref[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_ref_dwt2->band_a[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_ref_dwt2->band_v[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
s10 = tmphi_ref[j0];
s11 = tmphi_ref[j1];
s12 = tmphi_ref[j2];
s13 = tmphi_ref[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_ref_dwt2->band_h[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_ref_dwt2->band_d[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
s10 = tmplo_dis[j0];
s11 = tmplo_dis[j1];
s12 = tmplo_dis[j2];
s13 = tmplo_dis[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_dis_dwt2->band_a[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_dis_dwt2->band_v[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
s10 = tmphi_dis[j0];
s11 = tmphi_dis[j1];
s12 = tmphi_dis[j2];
s13 = tmphi_dis[j3];
accum_ref = 0;
accum_ref += (int64_t)filter_lo[0] * s10;
accum_ref += (int64_t)filter_lo[1] * s11;
accum_ref += (int64_t)filter_lo[2] * s12;
accum_ref += (int64_t)filter_lo[3] * s13;
i4_dis_dwt2->band_h[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
accum_ref = 0;
accum_ref += (int64_t)filter_hi[0] * s10;
accum_ref += (int64_t)filter_hi[1] * s11;
accum_ref += (int64_t)filter_hi[2] * s12;
accum_ref += (int64_t)filter_hi[3] * s13;
i4_dis_dwt2->band_d[i * dst_stride + j] = (int32_t)((accum_ref +
add_bef_shift_round_HP[scale - 1]) >> shift_HorizontalPass[scale - 1]);
}
}
}