in libvmaf/src/feature/integer_vif.c [345:486]
void vif_statistic_16(struct VifPublicState *s, float *num, float *den, unsigned w, unsigned h, int bpc, int scale) {
const unsigned fwidth = vif_filter1d_width[scale];
const uint16_t *vif_filt = vif_filter1d_table[scale];
VifBuffer buf = s->buf;
int64_t accum_num_log = 0.0;
int64_t accum_den_log = 0.0;
int64_t accum_num_non_log = 0;
int64_t accum_den_non_log = 0;
static const int32_t sigma_nsq = 65536 << 1;
uint16_t *log2_table = s->log2_table;
double vif_enhn_gain_limit = s->vif_enhn_gain_limit;
int32_t add_shift_round_HP, shift_HP;
int32_t add_shift_round_VP, shift_VP;
int32_t add_shift_round_VP_sq, shift_VP_sq;
if (scale == 0) {
shift_HP = 16;
add_shift_round_HP = 32768;
shift_VP = bpc;
add_shift_round_VP = 1 << (bpc - 1);
shift_VP_sq = (bpc - 8) * 2;
add_shift_round_VP_sq = (bpc == 8) ? 0 : 1 << (shift_VP_sq - 1);
}
else {
shift_HP = 16;
add_shift_round_HP = 32768;
shift_VP = 16;
add_shift_round_VP = 32768;
shift_VP_sq = 16;
add_shift_round_VP_sq = 32768;
}
for (unsigned i = 0; i < h; ++i) {
//VERTICAL
for (unsigned j = 0; j < w; ++j) {
uint32_t accum_mu1 = 0;
uint32_t accum_mu2 = 0;
uint64_t accum_ref = 0;
uint64_t accum_dis = 0;
uint64_t accum_ref_dis = 0;
for (unsigned fi = 0; fi < fwidth; ++fi) {
int ii = i - fwidth / 2;
int ii_check = ii + fi;
const uint16_t fcoeff = vif_filt[fi];
const ptrdiff_t stride = buf.stride / sizeof(uint16_t);
uint16_t *ref = buf.ref;
uint16_t *dis = buf.dis;
uint16_t imgcoeff_ref = ref[ii_check * stride + j];
uint16_t imgcoeff_dis = dis[ii_check * stride + j];
uint32_t img_coeff_ref = fcoeff * (uint32_t)imgcoeff_ref;
uint32_t img_coeff_dis = fcoeff * (uint32_t)imgcoeff_dis;
accum_mu1 += img_coeff_ref;
accum_mu2 += img_coeff_dis;
accum_ref += img_coeff_ref * (uint64_t)imgcoeff_ref;
accum_dis += img_coeff_dis * (uint64_t)imgcoeff_dis;
accum_ref_dis += img_coeff_ref * (uint64_t)imgcoeff_dis;
}
buf.tmp.mu1[j] = (uint16_t)((accum_mu1 + add_shift_round_VP) >> shift_VP);
buf.tmp.mu2[j] = (uint16_t)((accum_mu2 + add_shift_round_VP) >> shift_VP);
buf.tmp.ref[j] = (uint32_t)((accum_ref + add_shift_round_VP_sq) >> shift_VP_sq);
buf.tmp.dis[j] = (uint32_t)((accum_dis + add_shift_round_VP_sq) >> shift_VP_sq);
buf.tmp.ref_dis[j] = (uint32_t)((accum_ref_dis + add_shift_round_VP_sq) >> shift_VP_sq);
}
PADDING_SQ_DATA(buf, w, fwidth / 2);
//HORIZONTAL
for (unsigned j = 0; j < w; ++j) {
uint32_t accum_mu1 = 0;
uint32_t accum_mu2 = 0;
uint64_t accum_ref = 0;
uint64_t accum_dis = 0;
uint64_t accum_ref_dis = 0;
for (unsigned fj = 0; fj < fwidth; ++fj) {
int jj = j - fwidth / 2;
int jj_check = jj + fj;
const uint16_t fcoeff = vif_filt[fj];
accum_mu1 += fcoeff * ((uint32_t)buf.tmp.mu1[jj_check]);
accum_mu2 += fcoeff * ((uint32_t)buf.tmp.mu2[jj_check]);
accum_ref += fcoeff * ((uint64_t)buf.tmp.ref[jj_check]);
accum_dis += fcoeff * ((uint64_t)buf.tmp.dis[jj_check]);
accum_ref_dis += fcoeff * ((uint64_t)buf.tmp.ref_dis[jj_check]);
}
uint32_t mu1_val = accum_mu1;
uint32_t mu2_val = accum_mu2;
uint32_t mu1_sq_val = (uint32_t)((((uint64_t)mu1_val * mu1_val)
+ 2147483648) >> 32);
uint32_t mu2_sq_val = (uint32_t)((((uint64_t)mu2_val * mu2_val)
+ 2147483648) >> 32);
uint32_t mu1_mu2_val = (uint32_t)((((uint64_t)mu1_val * mu2_val)
+ 2147483648) >> 32);
uint32_t xx_filt_val = (uint32_t)((accum_ref + add_shift_round_HP) >> shift_HP);
uint32_t yy_filt_val = (uint32_t)((accum_dis + add_shift_round_HP) >> shift_HP);
uint32_t xy_filt_val = (uint32_t)((accum_ref_dis + add_shift_round_HP) >> shift_HP);
int32_t sigma1_sq = (int32_t)(xx_filt_val - mu1_sq_val);
int32_t sigma2_sq = (int32_t)(yy_filt_val - mu2_sq_val);
int32_t sigma12 = (int32_t)(xy_filt_val - mu1_mu2_val);
sigma2_sq = MAX(sigma2_sq, 0);
if (sigma1_sq >= sigma_nsq) {
/**
* log values are taken from the look-up table generated by
* log_generate() function which is called in integer_combo_threadfunc
* den_val in float is log2(1 + sigma1_sq/2)
* here it is converted to equivalent of log2(2+sigma1_sq) - log2(2) i.e log2(2*65536+sigma1_sq) - 17
* multiplied by 2048 as log_value = log2(i)*2048 i=16384 to 65535 generated using log_value
* x because best 16 bits are taken
*/
accum_den_log += log2_32(log2_table, sigma_nsq + sigma1_sq) - 2048 * 17;
if (sigma12 > 0 && sigma2_sq > 0) {
/**
* In floating-point numerator = log2((1.0f + (g * g * sigma1_sq)/(sv_sq + sigma_nsq))
*
* In Fixed-point the above is converted to
* numerator = log2((sv_sq + sigma_nsq)+(g * g * sigma1_sq))- log2(sv_sq + sigma_nsq)
*/
const double eps = 65536 * 1.0e-10;
double g = sigma12 / (sigma1_sq + eps); // this epsilon can go away
int32_t sv_sq = sigma2_sq - g * sigma12;
sv_sq = (uint32_t)(MAX(sv_sq, 0));
g = MIN(g, vif_enhn_gain_limit);
uint32_t numer1 = (sv_sq + sigma_nsq);
int64_t numer1_tmp = (int64_t)((g * g * sigma1_sq)) + numer1; //numerator
accum_num_log += log2_64(log2_table, numer1_tmp) - log2_64(log2_table, numer1);
}
}
else {
accum_num_non_log += sigma2_sq;
accum_den_non_log++;
}
}
}
num[0] = accum_num_log / 2048.0 + (accum_den_non_log - ((accum_num_non_log) / 16384.0) / (65025.0));
den[0] = accum_den_log / 2048.0 + accum_den_non_log;
}