pdq/cpp/downscaling/downscaling.cpp (294 lines of code) (raw):

// ================================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // ================================================================ #include "downscaling.h" #include <math.h> #include <stdio.h> #include <stdlib.h> #include <chrono> namespace facebook { namespace pdq { namespace downscaling { // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // From Wikipedia: standard RGB to luminance (the 'Y' in 'YUV'). static const float luma_from_R_coeff = 0.299; static const float luma_from_G_coeff = 0.587; static const float luma_from_B_coeff = 0.114; // ---------------------------------------------------------------- void fillFloatRGB( const uint8_t* pRbase, const uint8_t* pGbase, const uint8_t* pBbase, int numRows, int numCols, int rowStride, int colStride, float* pFloatR, // matrix as num_rows x num_cols in row-major order float* pFloatG, // matrix as num_rows x num_cols in row-major order float* pFloatB // matrix as num_rows x num_cols in row-major order ) { const uint8_t* pRrow = pRbase; const uint8_t* pGrow = pGbase; const uint8_t* pBrow = pBbase; for (int i = 0; i < numRows; i++) { const uint8_t* pR = pRrow; const uint8_t* pG = pGrow; const uint8_t* pB = pBrow; for (int j = 0; j < numCols; j++) { pFloatR[i * numCols + j] = (float)*pR; pFloatG[i * numCols + j] = (float)*pG; pFloatB[i * numCols + j] = (float)*pB; pR += colStride; pG += colStride; pB += colStride; } pRrow += rowStride; pGrow += rowStride; pBrow += rowStride; } } // ---------------------------------------------------------------- void fillFloatRGBFromGrey( const uint8_t* pbase, int numRows, int numCols, int rowStride, int colStride, float* pFloatR, // matrix as num_rows x num_cols in row-major order float* pFloatG, // matrix as num_rows x num_cols in row-major order float* pFloatB // matrix as num_rows x num_cols in row-major order ) { const uint8_t* prow = pbase; for (int i = 0; i < numRows; i++) { const uint8_t* p = prow; for (int j = 0; j < numCols; j++) { pFloatR[i * numCols + j] = (float)*p; pFloatG[i * numCols + j] = (float)*p; pFloatB[i * numCols + j] = (float)*p; p += colStride; } prow += rowStride; } } // ---------------------------------------------------------------- void fillFloatLumaFromRGB( const uint8_t* pRbase, const uint8_t* pGbase, const uint8_t* pBbase, int numRows, int numCols, int rowStride, int colStride, float* luma // matrix as num_rows x num_cols in row-major order ) { const uint8_t* pRrow = pRbase; const uint8_t* pGrow = pGbase; const uint8_t* pBrow = pBbase; for (int i = 0; i < numRows; i++) { const uint8_t* pR = pRrow; const uint8_t* pG = pGrow; const uint8_t* pB = pBrow; for (int j = 0; j < numCols; j++) { float yval = luma_from_R_coeff * (*pR) + luma_from_G_coeff * (*pG) + luma_from_B_coeff * (*pB); luma[i * numCols + j] = yval; pR += colStride; pG += colStride; pB += colStride; } pRrow += rowStride; pGrow += rowStride; pBrow += rowStride; } } // ---------------------------------------------------------------- void fillFloatLumaFromGrey( const uint8_t* pbase, int numRows, int numCols, int rowStride, int colStride, float* luma // matrix as num_rows x num_cols in row-major order ) { const uint8_t* prow = pbase; for (int i = 0; i < numRows; i++) { const uint8_t* p = prow; for (int j = 0; j < numCols; j++) { luma[i * numCols + j] = (float)(*p); p += colStride; } prow += rowStride; } } // ---------------------------------------------------------------- void decimateFloat( const float* in, // matrix as in_num_rows x in_num_cols in row-major order int inNumRows, int inNumCols, float* out, // matrix as out_num_rows x out_num_cols in row-major order int outNumRows, int outNumCols) { // target centers not corners: for (int outi = 0; outi < outNumRows; outi++) { int ini = (int)(((outi + 0.5) * inNumRows) / outNumRows); for (int outj = 0; outj < outNumCols; outj++) { int inj = (int)(((outj + 0.5) * inNumCols) / outNumCols); out[outi * outNumCols + outj] = in[ini * inNumCols + inj]; } } } // ---------------------------------------------------------------- void scaleFloatLuma( float* fullBuffer1, // matrix as num_rows x num_cols in row-major order float* fullBuffer2, // matrix as num_rows x num_cols in row-major order int oldNumRows, int oldNumCols, int numJaroszXYPasses, float* scaledLuma, // matrix as num_rows x num_cols in row-major order int newNumRows, int newNumCols) { // Downsample (blur and decimate) int windowSizeAlongRows = computeJaroszFilterWindowSize(oldNumCols, newNumCols); int windowSizeAlongCols = computeJaroszFilterWindowSize(oldNumRows, newNumRows); jaroszFilterFloat( fullBuffer1, fullBuffer2, oldNumRows, oldNumCols, windowSizeAlongRows, windowSizeAlongCols, numJaroszXYPasses); decimateFloat( fullBuffer1, oldNumRows, oldNumCols, scaledLuma, newNumRows, newNumCols); } // ---------------------------------------------------------------- void scaleFloatRGB( float* fullBufferR1, // matrix as num_rows x num_cols in row-major order float* fullBufferG1, // matrix as num_rows x num_cols in row-major order float* fullBufferB1, // matrix as num_rows x num_cols in row-major order float* fullBufferR2, // matrix as num_rows x num_cols in row-major order float* fullBufferG2, // matrix as num_rows x num_cols in row-major order float* fullBufferB2, // matrix as num_rows x num_cols in row-major order int oldNumRows, int oldNumCols, int numJaroszXYPasses, float* scaledR, // matrix as num_rows x num_cols in row-major order float* scaledG, // matrix as num_rows x num_cols in row-major order float* scaledB, // matrix as num_rows x num_cols in row-major order int newNumRows, int newNumCols) { if (newNumRows == oldNumRows && newNumCols == oldNumCols) { // E.g. for video-frame processing when we've already used ffmpeg // to downsample for us. int n = oldNumRows * oldNumCols; for (int i = 0; i < n; i++) { scaledR[i] = fullBufferR1[i]; scaledG[i] = fullBufferG1[i]; scaledB[i] = fullBufferB1[i]; } } else { // Downsample (blur and decimate) int windowSizeAlongRows = computeJaroszFilterWindowSize(oldNumCols, newNumCols); int windowSizeAlongCols = computeJaroszFilterWindowSize(oldNumRows, newNumRows); jaroszFilterFloat( fullBufferR1, fullBufferR2, oldNumRows, oldNumCols, windowSizeAlongRows, windowSizeAlongCols, numJaroszXYPasses); jaroszFilterFloat( fullBufferG1, fullBufferG2, oldNumRows, oldNumCols, windowSizeAlongRows, windowSizeAlongCols, numJaroszXYPasses); jaroszFilterFloat( fullBufferB1, fullBufferB2, oldNumRows, oldNumCols, windowSizeAlongRows, windowSizeAlongCols, numJaroszXYPasses); decimateFloat( fullBufferR1, oldNumRows, oldNumCols, scaledR, newNumRows, newNumCols); decimateFloat( fullBufferG1, oldNumRows, oldNumCols, scaledG, newNumRows, newNumCols); decimateFloat( fullBufferB1, oldNumRows, oldNumCols, scaledB, newNumRows, newNumCols); } } // ================================================================ // Round up. See comments at top of file for details. // // Since PDQ uses 64x64 blocks, 1/64th of the image height/width respectively // is a full block. But since we use two passes, we want half that window size // per pass. Example: 1024x1024 full-resolution input. PDQ downsamples to // 64x64. Each 16x16 block of the input produces a single downsample pixel. // X,Y passes with window size 8 (= 1024/128) average pixels with 8x8 // neighbors. The second X,Y pair of 1D box-filter passes accumulate data from // all 16x16. // // Generalizing beyond PDQ's 64x64 downsample to MxN, the desired value is 2M // or 2N. int computeJaroszFilterWindowSize(int oldDimension, int newDimension) { return (oldDimension + 2 * newDimension - 1) / (2 * newDimension); } // ---------------------------------------------------------------- void jaroszFilterFloat( float* buffer1, // matrix as num_rows x num_cols in row-major order float* buffer2, // matrix as num_rows x num_cols in row-major order int numRows, int numCols, int windowSizeAlongRows, int windowSizeAlongCols, int nreps) { for (int i = 0; i < nreps; i++) { boxAlongRowsFloat(buffer1, buffer2, numRows, numCols, windowSizeAlongRows); boxAlongColsFloat(buffer2, buffer1, numRows, numCols, windowSizeAlongCols); } } // ---------------------------------------------------------------- // 7 and 4 // // 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 // // . PHASE 1: ONLY ADD, NO WRITE, NO SUBTRACT // . . // . . . // // 0 * . . . PHASE 2: ADD, WRITE, WITH NO SUBTRACTS // 1 . * . . . // 2 . . * . . . // 3 . . . * . . . // // 4 . . . * . . . PHASE 3: WRITES WITH ADD & SUBTRACT // 5 . . . * . . . // 6 . . . * . . . // 7 . . . * . . . // 8 . . . * . . . // 9 . . . * . . . // 10 . . . * . . . // 11 . . . * . . . // 12 . . . * . . . // // 13 . . . * . . PHASE 4: FINAL WRITES WITH NO ADDS // 14 . . . * . // 15 . . . * // // = 0 = 0 PHASE 1 // = 0+1 = 1 // = 0+1+2 = 3 // // out[ 0] = 0+1+2+3 = 6 PHASE 2 // out[ 1] = 0+1+2+3+4 = 10 // out[ 2] = 0+1+2+3+4+5 = 15 // out[ 3] = 0+1+2+3+4+5+6 = 21 // // out[ 4] = 1+2+3+4+5+6+7 = 28 PHASE 3 // out[ 5] = 2+3+4+5+6+7+8 = 35 // out[ 6] = 3+4+5+6+7+8+9 = 42 // out[ 7] = 4+5+6+7+8+9+10 = 49 // out[ 8] = 5+6+7+8+9+10+11 = 56 // out[ 9] = 6+7+8+9+10+11+12 = 63 // out[10] = 7+8+9+10+11+12+13 = 70 // out[11] = 8+9+10+11+12+13+14 = 77 // out[12] = 9+10+11+12+13+14+15 = 84 // // out[13] = 10+11+12+13+14+15 = 75 PHASE 4 // out[14] = 11+12+13+14+15 = 65 // out[15] = 12+13+14+15 = 54 // ---------------------------------------------------------------- // ---------------------------------------------------------------- // 8 and 5 // // 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 // // . PHASE 1: ONLY ADD, NO WRITE, NO SUBTRACT // . . // . . . // . . . . // // 0 * . . . . PHASE 2: ADD, WRITE, WITH NO SUBTRACTS // 1 . * . . . . // 2 . . * . . . . // 3 . . . * . . . . // // 4 . . . * . . . . PHASE 3: WRITES WITH ADD & SUBTRACT // 5 . . . * . . . . // 6 . . . * . . . . // 7 . . . * . . . . // 8 . . . * . . . . // 9 . . . * . . . . // 10 . . . * . . . . // 11 . . . * . . . . // // 12 . . . * . . . PHASE 4: FINAL WRITES WITH NO ADDS // 13 . . . * . . // 14 . . . * . // 15 . . . * // // = 0 = 0 PHASE 1 // = 0+1 = 1 // = 0+1+2 = 3 // = 0+1+2+3 = 6 // // out[ 0] = 0+1+2+3+4 = 10 // out[ 1] = 0+1+2+3+4+5 = 15 // out[ 2] = 0+1+2+3+4+5+6 = 21 // out[ 3] = 0+1+2+3+4+5+6+7 = 28 // // out[ 4] = 1+2+3+4+5+6+7+8 = 36 PHASE 3 // out[ 5] = 2+3+4+5+6+7+8+9 = 44 // out[ 6] = 3+4+5+6+7+8+9+10 = 52 // out[ 7] = 4+5+6+7+8+9+10+11 = 60 // out[ 8] = 5+6+7+8+9+10+11+12 = 68 // out[ 9] = 6+7+8+9+10+11+12+13 = 76 // out[10] = 7+8+9+10+11+12+13+14 = 84 // out[11] = 8+9+10+11+12+13+14+15 = 92 // // out[12] = 9+10+11+12+13+14+15 = 84 PHASE 4 // out[13] = 10+11+12+13+14+15 = 75 PHASE 4 // out[14] = 11+12+13+14+15 = 65 // out[15] = 12+13+14+15 = 54 // ---------------------------------------------------------------- void box1DFloat( const float* invec, float* outvec, int vector_length, int stride, int full_window_size) { int half_window_size = (full_window_size + 2) / 2; // 7->4, 8->5 int phase_1_nreps = half_window_size - 1; int phase_2_nreps = full_window_size - half_window_size + 1; int phase_3_nreps = vector_length - full_window_size; int phase_4_nreps = half_window_size - 1; int li = 0; // Index of left edge of read window, for subtracts int ri = 0; // Index of right edge of read windows, for adds int oi = 0; // Index into output vector float sum = 0.0; int current_window_size = 0; // PHASE 1: ACCUMULATE FIRST SUM NO WRITES for (int i = 0; i < phase_1_nreps; i++) { sum += invec[ri]; current_window_size++; ri += stride; } // PHASE 2: INITIAL WRITES WITH SMALL WINDOW for (int i = 0; i < phase_2_nreps; i++) { sum += invec[ri]; current_window_size++; outvec[oi] = sum / current_window_size; ri += stride; oi += stride; } // PHASE 3: WRITES WITH FULL WINDOW for (int i = 0; i < phase_3_nreps; i++) { sum += invec[ri]; sum -= invec[li]; outvec[oi] = sum / current_window_size; li += stride; ri += stride; oi += stride; } // PHASE 4: FINAL WRITES WITH SMALL WINDOW for (int i = 0; i < phase_4_nreps; i++) { sum -= invec[li]; current_window_size--; outvec[oi] = sum / current_window_size; li += stride; oi += stride; } } // ---------------------------------------------------------------- void boxAlongRowsFloat( const float* in, // matrix as num_rows x num_cols in row-major order float* out, // matrix as num_rows x num_cols in row-major order int numRows, int numCols, int window_size) { for (int i = 0; i < numRows; i++) { box1DFloat(&in[i * numCols], &out[i * numCols], numCols, 1, window_size); } } // ---------------------------------------------------------------- void boxAlongColsFloat( const float* in, // matrix as num_rows x num_cols in row-major order float* out, // matrix as num_rows x num_cols in row-major order int numRows, int numCols, int window_size) { for (int j = 0; j < numCols; j++) { box1DFloat(&in[j], &out[j], numRows, numCols, window_size); } } } // namespace downscaling } // namespace pdq } // namespace facebook