pdq/php/pdqhasher.php (647 lines of code) (raw):
<?php
// ================================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// ================================================================
// ================================================================
// This file is bottom-up: more detailed methods at the top, main
// entry points at the bottom. Namely:
//
// * computeHashAndQualityFromFilename:
// Returns an array of PDQHash object and integer 0-100 quality.
//
// * computeHashesAndQualityFromFilename:
// Returns an array of [array of eight PDQHash objects keyed by rotation/flip
// name] and integer 0-100 quality.
//
// These first two are pure-PHP. The downsample phase is the most expensive
// part of PDQ, so these use the GD library to downsample first.
//
// * computeStringHashAndQualityFromFilenameUsingExtension:
// Returns an array of hex-string hash and integer 0-100 quality.
//
// * computeStringHashesAndQualityFromFilenameUsingExtension
// Returns array of [array of eight hex-string hashes keyed by rotation/flip
// name] and integer 0-100 quality.
//
// These last two use a C-language Zend extension to do most of the work.
// ================================================================
require 'pdqhash.php';
class PDQHasher {
const LUMA_FROM_R_COEFF = 0.299;
const LUMA_FROM_G_COEFF = 0.587;
const LUMA_FROM_B_COEFF = 0.114;
const PDQ_JAROSZ_WINDOW_SIZE_DIVISOR = 128;
const PDQ_NUM_JAROSZ_XY_PASSES = 2;
// Hashes for various dihedral transformations of the image.
// Note, you can also transform the image and hash that.
const DIH_ORIGINAL = 0x01;
const DIH_ROTATE_90 = 0x02;
const DIH_ROTATE_180 = 0x04;
const DIH_ROTATE_270 = 0x08;
const DIH_FLIP_X = 0x10;
const DIH_FLIP_Y = 0x20;
const DIH_FLIP_PLUS_1 = 0x40;
const DIH_FLIP_MINUS_1 = 0x80;
const DIH_ALL = 0xff;
// ----------------------------------------------------------------
// Handles greyscale or RGB.
//
// It's not obvious (to me) which is which: for JPEG the channel count is
// available from getimagesize() while for PNG it isn't.
//
// But it doesn't matter due to how PHP handles pixels: for RGB the triples
// are OR'ed into a 24-bit value while for greyscale they're the lower 8
// bits. So we get greyscale as 'blue', multiplied by an arbitrary scaling
// coefficient which doesn't affect the median property of the DCT output.
static function imageToLumaMatrix(
$image, // resource
$num_rows,
$num_cols
) {
$luma_matrix = array();
for ($i = 0; $i < $num_rows; $i++) {
$row = array();
for ($j = 0; $j < $num_cols; $j++) {
$pixel = imagecolorat($image, $j, $i);
$r = $pixel >> 16;
$g = ($pixel >> 8) & 0xff;
$b = $pixel & 0xff;
$y = self::LUMA_FROM_R_COEFF * $r
+ self::LUMA_FROM_G_COEFF * $g
+ self::LUMA_FROM_B_COEFF * $b;
$row[$j] = $y;
}
$luma_matrix[$i] = $row;
}
return $luma_matrix;
}
// ================================================================
// Wojciech Jarosz 'Fast Image Convolutions' ACM SIGGRAPH 2001:
// X,Y,X,Y passes of 1-D box filters produces a 2D tent filter.
//
// Since PDQ uses 64x64 blocks, 1/64th of the image height/width respectively is
// a full block. But since we use two passes, we want half that window size per
// pass. Example: 1024x1024 full-resolution input. PDQ downsamples to 64x64.
// Each 16x16 block of the input produces a single downsample pixel. X,Y passes
// with window size 8 (= 1024/128) average pixels with 8x8 neighbors. The second
// X,Y pair of 1D box-filter passes accumulate data from all 16x16.
// ----------------------------------------------------------------
static function computeJaroszFilterWindowSize(
$dimension
) {
return (int)(($dimension + self::PDQ_JAROSZ_WINDOW_SIZE_DIVISOR - 1)
/ self::PDQ_JAROSZ_WINDOW_SIZE_DIVISOR);
}
// ----------------------------------------------------------------
// 7 and 4
//
// 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
//
// . PHASE 1: ONLY ADD, NO WRITE, NO SUBTRACT
// . .
// . . .
//
// 0 * . . . PHASE 2: ADD, WRITE, WITH NO SUBTRACTS
// 1 . * . . .
// 2 . . * . . .
// 3 . . . * . . .
//
// 4 . . . * . . . PHASE 3: WRITES WITH ADD & SUBTRACT
// 5 . . . * . . .
// 6 . . . * . . .
// 7 . . . * . . .
// 8 . . . * . . .
// 9 . . . * . . .
// 10 . . . * . . .
// 11 . . . * . . .
// 12 . . . * . . .
//
// 13 . . . * . . PHASE 4: FINAL WRITES WITH NO ADDS
// 14 . . . * .
// 15 . . . *
//
// = 0 = 0 PHASE 1
// = 0+1 = 1
// = 0+1+2 = 3
//
// out[ 0] = 0+1+2+3 = 6 PHASE 2
// out[ 1] = 0+1+2+3+4 = 10
// out[ 2] = 0+1+2+3+4+5 = 15
// out[ 3] = 0+1+2+3+4+5+6 = 21
//
// out[ 4] = 1+2+3+4+5+6+7 = 28 PHASE 3
// out[ 5] = 2+3+4+5+6+7+8 = 35
// out[ 6] = 3+4+5+6+7+8+9 = 42
// out[ 7] = 4+5+6+7+8+9+10 = 49
// out[ 8] = 5+6+7+8+9+10+11 = 56
// out[ 9] = 6+7+8+9+10+11+12 = 63
// out[10] = 7+8+9+10+11+12+13 = 70
// out[11] = 8+9+10+11+12+13+14 = 77
// out[12] = 9+10+11+12+13+14+15 = 84
//
// out[13] = 10+11+12+13+14+15 = 75 PHASE 4
// out[14] = 11+12+13+14+15 = 65
// out[15] = 12+13+14+15 = 54
// ----------------------------------------------------------------
// ----------------------------------------------------------------
// 8 and 5
//
// 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
//
// . PHASE 1: ONLY ADD, NO WRITE, NO SUBTRACT
// . .
// . . .
// . . . .
//
// 0 * . . . . PHASE 2: ADD, WRITE, WITH NO SUBTRACTS
// 1 . * . . . .
// 2 . . * . . . .
// 3 . . . * . . . .
//
// 4 . . . * . . . . PHASE 3: WRITES WITH ADD & SUBTRACT
// 5 . . . * . . . .
// 6 . . . * . . . .
// 7 . . . * . . . .
// 8 . . . * . . . .
// 9 . . . * . . . .
// 10 . . . * . . . .
// 11 . . . * . . . .
//
// 12 . . . * . . . PHASE 4: FINAL WRITES WITH NO ADDS
// 13 . . . * . .
// 14 . . . * .
// 15 . . . *
//
// = 0 = 0 PHASE 1
// = 0+1 = 1
// = 0+1+2 = 3
// = 0+1+2+3 = 6
//
// out[ 0] = 0+1+2+3+4 = 10
// out[ 1] = 0+1+2+3+4+5 = 15
// out[ 2] = 0+1+2+3+4+5+6 = 21
// out[ 3] = 0+1+2+3+4+5+6+7 = 28
//
// out[ 4] = 1+2+3+4+5+6+7+8 = 36 PHASE 3
// out[ 5] = 2+3+4+5+6+7+8+9 = 44
// out[ 6] = 3+4+5+6+7+8+9+10 = 52
// out[ 7] = 4+5+6+7+8+9+10+11 = 60
// out[ 8] = 5+6+7+8+9+10+11+12 = 68
// out[ 9] = 6+7+8+9+10+11+12+13 = 76
// out[10] = 7+8+9+10+11+12+13+14 = 84
// out[11] = 8+9+10+11+12+13+14+15 = 92
//
// out[12] = 9+10+11+12+13+14+15 = 84 PHASE 4
// out[13] = 10+11+12+13+14+15 = 75 PHASE 4
// out[14] = 11+12+13+14+15 = 65
// out[15] = 12+13+14+15 = 54
// ----------------------------------------------------------------
// ----------------------------------------------------------------
static function boxAlongRows(
&$in_image, // 2D array of float
&$out_image, // 2D array of float
$num_rows,
$num_cols,
$window_size
) {
for ($j = 0; $j < $num_cols; $j++) {
$half_window_size = (int)(($window_size + 2) / 2); // 7->4, 8->5
$phase_1_nreps = $half_window_size - 1;
$phase_2_nreps = $window_size - $half_window_size + 1;
$phase_3_nreps = $num_rows - $window_size;
$phase_4_nreps = $half_window_size - 1;
$li = 0; // Index of left edge of read window, for subtracts
$ri = 0; // Index of right edge of read windows, for adds
$oi = 0; // Index into output vector
$sum = 0.0;
$current_window_size = 0;
// PHASE 1: ACCUMULATE FIRST SUM NO WRITES
for ($k = 0; $k < $phase_1_nreps; $k++) {
$sum += $in_image[$ri][$j];
$current_window_size++;
$ri++;
}
// PHASE 2: INITIAL WRITES WITH SMALL WINDOW
for ($k = 0; $k < $phase_2_nreps; $k++) {
$sum += $in_image[$ri][$j];
$current_window_size++;
$out_image[$oi][$j] = $sum / $current_window_size;
$ri++;
$oi++;
}
// PHASE 3: WRITES WITH FULL WINDOW
for ($k = 0; $k < $phase_3_nreps; $k++) {
$sum += $in_image[$ri][$j];
$sum -= $in_image[$li][$j];
$out_image[$oi][$j] = $sum / $current_window_size;
$li++;
$ri++;
$oi++;
}
// PHASE 4: FINAL WRITES WITH SMALL WINDOW
for ($k = 0; $k < $phase_4_nreps; $k++) {
$sum -= $in_image[$li][$j];
$current_window_size--;
$out_image[$oi][$j] = $sum / $current_window_size;
$li++;
$oi++;
}
}
}
static function boxAlongCols(
&$in_image, // 2D array of float
&$out_image, // 2D array of float
$num_rows,
$num_cols,
$window_size
) {
for ($i = 0; $i < $num_rows; $i++) {
$half_window_size = (int)(($window_size + 2) / 2); // 7->4, 8->5
$phase_1_nreps = $half_window_size - 1;
$phase_2_nreps = $window_size - $half_window_size + 1;
$phase_3_nreps = $num_cols - $window_size;
$phase_4_nreps = $half_window_size - 1;
$li = 0; // Index of left edge of read window, for subtracts
$ri = 0; // Index of right edge of read windows, for adds
$oi = 0; // Index into output vector
$sum = 0.0;
$current_window_size = 0;
// PHASE 1: ACCUMULATE FIRST SUM NO WRITES
for ($k = 0; $k < $phase_1_nreps; $k++) {
$sum += $in_image[$i][$ri];
$current_window_size++;
$ri++;
}
// PHASE 2: INITIAL WRITES WITH SMALL WINDOW
for ($k = 0; $k < $phase_2_nreps; $k++) {
$sum += $in_image[$i][$ri];
$current_window_size++;
$out_image[$i][$oi] = $sum / $current_window_size;
$ri++;
$oi++;
}
// PHASE 3: WRITES WITH FULL WINDOW
for ($k = 0; $k < $phase_3_nreps; $k++) {
$sum += $in_image[$i][$ri];
$sum -= $in_image[$i][$li];
$out_image[$i][$oi] = $sum / $current_window_size;
$li++;
$ri++;
$oi++;
}
// PHASE 4: FINAL WRITES WITH SMALL WINDOW
for ($k = 0; $k < $phase_4_nreps; $k++) {
$sum -= $in_image[$i][$li];
$current_window_size--;
$out_image[$i][$oi] = $sum / $current_window_size;
$li++;
$oi++;
}
}
}
// ----------------------------------------------------------------
static function jaroszFilter(
&$luma_matrix, // 2D array of float
$num_rows,
$num_cols,
$window_size_along_rows,
$window_size_along_cols
) {
$other_matrix = array();
for ($i = 0; $i < $num_rows; $i++) {
$row = array();
for ($j = 0; $j < $num_cols; $j++) {
$row[$j] = 0;
}
$other_matrix[$i] = $row;
}
for ($k = 0; $k < self::PDQ_NUM_JAROSZ_XY_PASSES; $k++) {
self::boxAlongRows($luma_matrix, $other_matrix, $num_rows, $num_cols, $window_size_along_rows);
self::boxAlongCols($other_matrix, $luma_matrix, $num_rows, $num_cols, $window_size_along_cols);
}
}
// ================================================================
// This is all heuristic (see the PDQ hashing doc). Quantization matters since
// we want to count *significant* gradients, not just the some of many small
// ones. The constants are all manually selected, and tuned as described in the
// document.
static function computeImageDomainQualityMetric(
&$buffer_64x64
) {
$int_gradient_sum = 0;
for ($i = 0; $i < 63; $i++) {
for ($j = 0; $j < 64; $j++) {
$u = $buffer_64x64[$i][$j];
$v = $buffer_64x64[$i+1][$j];
$d = (int)((($u - $v) * 100) / 255);
$int_gradient_sum += (int)abs($d);
}
}
for ($i = 0; $i < 64; $i++) {
for ($j = 0; $j < 63; $j++) {
$u = $buffer_64x64[$i][$j];
$v = $buffer_64x64[$i][$j+1];
$$d = (int)((($u - $v) * 100) / 255);
$int_gradient_sum += (int)abs($d);
}
}
// Heuristic scaling factor.
$quality = (int)($int_gradient_sum / 90);
if ($quality > 100) {
$quality = 100;
}
return $quality;
}
// ================================================================
// Full 64x64 to 64x64 can be optimized e.g. the Lee algorithm. But here we
// only want slots (1-16)x(1-16) of the full 64x64 output. Careful experiments
// showed that using Lee along all 64 slots in one dimension, then Lee along 16
// slots in the second, followed by extracting slots 1-16 of the output, was
// actually slower than the current implementation which is completely
// non-clever/non-Lee but computes only what is needed.
static function computeDCT64To16(
&$buffer_64x64,
&$buffer_16x64,
&$buffer_16x16,
&$dct_16x64
) {
// A = buffer_64x64
// T = buffer_16x64
// B = buffer_16x16
// D = DCT matrix
// 2D DCT:
// B = D A Dt
// Split out into first product and second:
// B = (D A) Dt ; T = D A
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 64; $j++) {
$sumk = 0.0;
for ($k = 0; $k < 64; $k++) {
$sumk += $dct_16x64[$i][$k] * $buffer_64x64[$k][$j];
}
$buffer_16x64[$i][$j] = $sumk;
}
}
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
$sumk = 0.0;
// sumk += T[i][k] * D[j][k];
for ($k = 0; $k < 64; $k++) {
$sumk += $buffer_16x64[$i][$k] * $dct_16x64[$j][$k];
}
$buffer_16x16[$i][$j] = $sumk;
}
}
}
// ----------------------------------------------------------------
// orig rot90 rot180 rot270
// noxpose xpose noxpose xpose
// + + + + - + - + + - + - - - - -
// + + + + - + - + - + - + + + + +
// + + + + - + - + + - + - - - - -
// + + + + - + - + - + - + + + + +
//
// flipx flipy flipplus flipminus
// noxpose noxpose xpose xpose
// - - - - - + - + + + + + + - + -
// + + + + - + - + + + + + - + - +
// - - - - - + - + + + + + + - + -
// + + + + - + - + + + + + - + - +
static function dct16OriginalToRotate90(&$A, &$B) {
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
if ($j & 1) {
$B[$j][$i] = $A[$i][$j];
} else {
$B[$j][$i] = -$A[$i][$j];
}
}
}
}
static function dct16OriginalToRotate180(&$A, &$B) {
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
if (($i+$j) & 1) {
$B[$i][$j] = -$A[$i][$j];
} else {
$B[$i][$j] = $A[$i][$j];
}
}
}
}
static function dct16OriginalToRotate270(&$A, &$B) {
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
if ($i & 1) {
$B[$j][$i] = $A[$i][$j];
} else {
$B[$j][$i] = -$A[$i][$j];
}
}
}
}
static function dct16OriginalToFlipX(&$A, &$B) {
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
if ($i & 1) {
$B[$i][$j] = $A[$i][$j];
} else {
$B[$i][$j] = -$A[$i][$j];
}
}
}
}
static function dct16OriginalToFlipY(&$A, &$B) {
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
if ($j & 1) {
$B[$i][$j] = $A[$i][$j];
} else {
$B[$i][$j] = -$A[$i][$j];
}
}
}
}
static function dct16OriginalToFlipPlus1(&$A, &$B) {
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
$B[$j][$i] = $A[$i][$j];
}
}
}
static function dct16OriginalToFlipMinus1(&$A, &$B) {
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++) {
if (($i+$j) & 1) {
$B[$j][$i] = -$A[$i][$j];
} else {
$B[$j][$i] = $A[$i][$j];
}
}
}
}
// ----------------------------------------------------------------
static function computeHashFromDCTOutput(
&$buffer_16x16
) {
$flat_matrix = array();
for ($k = 0, $i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++, $k++) {
$flat_matrix[$k] = $buffer_16x16[$i][$j];
}
}
sort($flat_matrix);
//print_r($flat_matrix);
$median = $flat_matrix[127];
//echo "median=$median\n";
$hash = PDQHash::makeZeroesHash();
for ($k = 0, $i = 0; $i < 16; $i++) {
for ($j = 0; $j < 16; $j++, $k++) {
$value = $buffer_16x16[$i][$j];
if ($value > $median) {
$hash->setBit($k);
}
}
}
return $hash;
}
// ================================================================
static function readImageFromFilename($filename, $downsample_first) {
$is_jpeg = false;
if (substr_compare($filename,'.jpg', -strlen('.jpg')) === 0) {
$orig_image = imagecreatefromjpeg($filename);
$is_jpeg = true;
} else if (substr_compare($filename,'.jpeg', -strlen('.jpeg')) === 0) {
$orig_image = imagecreatefromjpeg($filename);
$is_jpeg = true;
} else if (substr_compare($filename,'.png', -strlen('.png')) === 0) {
$orig_image = imagecreatefrompng($filename);
} else {
throw new Exception('PDQHasher: could not handle filetype of '.$filename);
}
// The pure-PHP hasher is *really* slow in pure PHP for megapixel images.
// So, downsample first. Don't worry about aspect ratio since PDQ will
// squarify anyway. For extension use, don't downsample here as it's
// redundant.
if ($downsample_first) {
$orig_height = imagesy($orig_image);
$orig_width = imagesx($orig_image);
if ($orig_height > 128 || $orig_width > 128) {
$image = imagecreatetruecolor(128, 128);
imagecopyresampled($image, $orig_image, 0, 0, 0, 0, 128, 128, $orig_width, $orig_height);
} else {
$image = $orig_image;
}
} else {
$image = $orig_image;
}
// NOTE: the PDQ hashes within ThreatExchange aren't respecting EXIF rotation tags
// so we should likewise ignore them.
//
// if ($is_jpeg) {
// $exif = exif_read_data($filename);
// if (!empty($exif['Orientation'])) {
// switch ($exif['Orientation']) {
// case 3:
// $image = imagerotate($image, 180, 0);
// break;
// case 6:
// $image = imagerotate($image, -90, 0);
// break;
// case 8:
// $image = imagerotate($image, 90, 0);
// break;
// }
// }
// }
return $image;
}
// ================================================================
static function computeDCTAndQualityFromImage(
/*resource*/&$image,
/*bool*/ $show_timings,
/*bool*/ $dump
) {
$num_rows = imagesy($image);
$num_cols = imagesx($image);
if ($dump) {
echo "num_rows=$num_rows\n";
echo "num_cols=$num_cols\n";
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// RGB to luma
$t1 = microtime(true);
$luma_matrix = self::imageToLumaMatrix($image, $num_rows, $num_cols);
$t2 = microtime(true);
if ($show_timings) {
printf("X010-LUMA %.6f\n", $t2-$t1);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Downsample (blur and decimate)
$t1 = microtime(true);
$window_size_along_rows = self::computeJaroszFilterWindowSize($num_cols);
$window_size_along_cols = self::computeJaroszFilterWindowSize($num_rows);
self::jaroszFilter($luma_matrix, $num_rows, $num_cols, $window_size_along_rows, $window_size_along_cols);
$t2 = microtime(true);
if ($show_timings) {
printf("X020-JRSZ %.6f\n", $t2-$t1);
}
// Decimation per se. Target centers not corners.
$buffer_64x64 = array();
for ($i = 0; $i < 64; $i++) {
$row = array();
for ($j = 0; $j < 64; $j++) {
$row[$j] = 0;
}
$buffer_64x64[$i] = $row;
}
$t1 = microtime(true);
for ($i = 0; $i < 64; $i++) {
$ini = (int)((($i + 0.5) * $num_rows) / 64);
for ($j = 0; $j < 64; $j++) {
$inj = (int)((($j + 0.5) * $num_cols) / 64);
$buffer_64x64[$i][$j] = $luma_matrix[$ini][$inj];
}
}
$t2 = microtime(true);
if ($dump) {
echo "DOWNSAMPLE IMAGE:\n";
print_r($buffer_64x64);
}
if ($show_timings) {
printf("X030-DSMP %.6f\n", $t2-$t1);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Quality metric. Reuse the 64x64 image-domain downsample
// since we already have it.
$t1 = microtime(true);
$quality = self::computeImageDomainQualityMetric($buffer_64x64);
$t2 = microtime(true);
if ($show_timings) {
printf("X040-QMTC %.6f\n", $t2-$t1);
}
if ($dump) {
echo "QUALITY:$quality\n";
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$t1 = microtime(true);
$buffer_16x64 = array();
for ($i = 0; $i < 16; $i++) {
$row = array();
for ($j = 0; $j < 64; $j++) {
$row[$j] = 0;
}
$buffer_16x64[$i] = $row;
}
$buffer_16x16 = array();
for ($i = 0; $i < 16; $i++) {
$row = array();
for ($j = 0; $j < 16; $j++) {
$row[$j] = 0;
}
$buffer_16x16[$i] = $row;
}
$dct_16x64 = array();
for ($i = 0; $i < 16; $i++) {
$row = array();
for ($j = 0; $j < 64; $j++) {
$row[$j] = 0;
}
$dct_16x64[$i] = $row;
}
// See comments on dct64To16. Input is (0..63)x(0..63); output is
// (1..16)x(1..16) with the latter indexed as (0..15)x(0..15).
$matrix_scale_factor = sqrt(2.0 / 64.0);
$pi = 3.141592653589793;
for ($i = 0; $i < 16; $i++) {
for ($j = 0; $j < 64; $j++) {
$dct_16x64[$i][$j] = $matrix_scale_factor *
cos(($pi / 2 / 64.0) * ($i+1) * (2 * $j + 1));
}
}
$t2 = microtime(true);
if ($show_timings) {
printf("X050-DMTX %.6f\n", $t2-$t1);
}
if ($dump) {
echo "DCT MATRIX:\n";
print_r($dct_16x64);
}
// 2D DCT
$t1 = microtime(true);
self::computeDCT64To16($buffer_64x64, $buffer_16x64, $buffer_16x16, $dct_16x64);
$t2 = microtime(true);
if ($show_timings) {
printf("X060-CDCT %.6f\n", $t2-$t1);
}
if ($dump) {
echo "DCT OUTPUT:\n";
print_r($buffer_16x16);
}
return array($buffer_16x16, $quality);
}
// ----------------------------------------------------------------
static function computeHashAndQualityFromImage(
/*resource*/&$image,
/*bool*/ $show_timings,
/*bool*/ $dump
) {
$t01 = microtime(true);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
list ($buffer_16x16, $quality) = self:: computeDCTAndQualityFromImage(
$image, $show_timings, $dump
);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$hash = self::computeHashFromDCTOutput($buffer_16x16);
if ($dump) {
echo "HASH:".$hash->toHexString()."\n";
}
$t02 = microtime(true);
if ($show_timings) {
printf("X999-OVRL %.6f\n", $t02-$t01);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
return array($hash, $quality);
}
// ----------------------------------------------------------------
static function computeHashesAndQualityFromImage(
/*resource*/&$image,
/*int*/ $which_flags = self::DIH_ALL,
/*bool*/ $show_timings,
/*bool*/ $dump
) {
$t01 = microtime(true);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
list ($buffer_16x16, $quality) = self:: computeDCTAndQualityFromImage(
$image, $show_timings, $dump
);
$buffer_16x16_aux = array();
for ($i = 0; $i < 16; $i++) {
$row = array();
for ($j = 0; $j < 16; $j++) {
$row[$j] = 0;
}
$buffer_16x16_aux[$i] = $row;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$hashes = array();
if ($which_flags & self::DIH_ORIGINAL) {
$hashes['orig'] = self::computeHashFromDCTOutput($buffer_16x16);
}
if ($which_flags & self::DIH_ROTATE_90) {
self::dct16OriginalToRotate90($buffer_16x16, $buffer_16x16Aux);
$hashes['r090'] = self::computeHashFromDCTOutput($buffer_16x16Aux);
}
if ($which_flags & self::DIH_ROTATE_180) {
self::dct16OriginalToRotate180($buffer_16x16, $buffer_16x16Aux);
$hashes['r180'] = self::computeHashFromDCTOutput($buffer_16x16Aux);
}
if ($which_flags & self::DIH_ROTATE_270) {
self::dct16OriginalToRotate270($buffer_16x16, $buffer_16x16Aux);
$hashes['r270'] = self::computeHashFromDCTOutput($buffer_16x16Aux);
}
if ($which_flags & self::DIH_FLIP_X) {
self::dct16OriginalToFlipX($buffer_16x16, $buffer_16x16Aux);
$hashes['flpx'] = self::computeHashFromDCTOutput($buffer_16x16Aux);
}
if ($which_flags & self::DIH_FLIP_Y) {
self::dct16OriginalToFlipY($buffer_16x16, $buffer_16x16Aux);
$hashes['flpy'] = self::computeHashFromDCTOutput($buffer_16x16Aux);
}
if ($which_flags & self::DIH_FLIP_PLUS_1) {
self::dct16OriginalToFlipPlus1($buffer_16x16, $buffer_16x16Aux);
$hashes['flpp'] = self::computeHashFromDCTOutput($buffer_16x16Aux);
}
if ($which_flags & self::DIH_FLIP_MINUS_1) {
self::dct16OriginalToFlipMinus1($buffer_16x16, $buffer_16x16Aux);
$hashes['flpm'] = self::computeHashFromDCTOutput($buffer_16x16Aux);
}
$t02 = microtime(true);
if ($show_timings) {
printf("X999-OVRL %.6f\n", $t02-$t01);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
return array($hashes, $quality);
}
// ================================================================
static function computeHashAndQualityFromFilename(
$filename,
$show_timings = false,
$dump = false
) {
$t01 = microtime(true);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$info = getimagesize($filename);
if ($dump) {
echo "IMAGE INFO:\n";
print_r($info);
}
$num_rows = $info[1]; // height
$num_cols = $info[0]; // width
if ($dump) {
echo "num_rows=$num_rows\n";
echo "num_cols=$num_cols\n";
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$t1 = microtime(true);
$image = self::readImageFromFilename($filename, true);
$t2 = microtime(true);
if ($show_timings) {
printf("X000-READ %.6f\n", $t2-$t1);
}
return self::computeHashAndQualityFromImage($image, $show_timings, $dump);
}
// ----------------------------------------------------------------
static function computeHashesAndQualityFromFilename(
$filename,
$which_flags = self::DIH_ALL,
$show_timings = false,
$dump = false
) {
$t01 = microtime(true);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$info = getimagesize($filename);
if ($dump) {
echo "IMAGE INFO:\n";
print_r($info);
}
$num_rows = $info[1]; // height
$num_cols = $info[0]; // width
if ($dump) {
echo "num_rows=$num_rows\n";
echo "num_cols=$num_cols\n";
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$t1 = microtime(true);
$image = self::readImageFromFilename($filename, true);
$t2 = microtime(true);
if ($show_timings) {
printf("X000-READ %.6f\n", $t2-$t1);
}
return self::computeHashesAndQualityFromImage(
$image, $which_flags, $show_timings, $dump
);
}
// ================================================================
// Array of hash and quality.
// The hash is a hex-string, not a PDQHash object.
static function computeStringHashAndQualityFromFilenameUsingExtension(
$filename,
$show_timings = false,
$dump = false
) {
$t01 = microtime(true);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$info = getimagesize($filename);
if ($dump) {
echo "IMAGE INFO:\n";
print_r($info);
}
$num_rows = $info[1]; // height
$num_cols = $info[0]; // width
if ($dump) {
echo "num_rows=$num_rows\n";
echo "num_cols=$num_cols\n";
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$t1 = microtime(true);
$image = self::readImageFromFilename($filename, false);
$t2 = microtime(true);
if ($show_timings) {
printf("X000-READ %.6f\n", $t2-$t1);
}
// Uses the PDQ Zend-PHP extension
$t1 = microtime(true);
$retval = pdq_compute_string_hash_and_quality_from_image_resource($image);
$t2 = microtime(true);
if ($show_timings) {
printf("X000-EXTN %.6f\n", $t2-$t1);
}
return array($retval['hash'], $retval['quality']);
}
// ----------------------------------------------------------------
static function computeStringHashesAndQualityFromFilenameUsingExtension(
$filename,
$which_flags = self::DIH_ALL,
$show_timings = false,
$dump = false
) {
$t01 = microtime(true);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$info = getimagesize($filename);
if ($dump) {
echo "IMAGE INFO:\n";
print_r($info);
}
$num_rows = $info[1]; // height
$num_cols = $info[0]; // width
if ($dump) {
echo "num_rows=$num_rows\n";
echo "num_cols=$num_cols\n";
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$t1 = microtime(true);
$image = self::readImageFromFilename($filename, false);
$t2 = microtime(true);
if ($show_timings) {
printf("X000-READ %.6f\n", $t2-$t1);
}
// Uses the PDQ Zend-PHP extension
$t1 = microtime(true);
$retval = pdq_compute_string_hashes_and_quality_from_image_resource($image);
$t2 = microtime(true);
if ($show_timings) {
printf("X000-EXTN %.6f\n", $t2-$t1);
}
return array(
array(
'orig' => $retval['orig'],
'r090' => $retval['r090'],
'r180' => $retval['r180'],
'r270' => $retval['r270'],
'flpx' => $retval['flpx'],
'flpy' => $retval['flpy'],
'flpp' => $retval['flpp'],
'flpm' => $retval['flpm'],
),
$retval['quality']
);
}
} // class PDQHasher