in pdq/java/src/main/java/pdqhashing/tools/MIHQueryTool.java [63:245]
public static void main(String[] args) {
boolean doBruteForceQuery = false;
int distanceThreshold = DEFAULT_PDQ_DISTANCE_THRESHOLD;
boolean doUnifiedOutput = false;
boolean doInvert = false;
// Parse command-line flags. I'm explicitly not using gflags or other such
// libraries, to minimize the number of external dependencies for this
// project.
int argi = 0;
int argc = args.length;
while (argi < argc) {
if (!args[argi].startsWith("-")) {
break;
}
if (args[argi].equals("-h") || args[argi].equals("--help")) {
usage(0);
} else if (args[argi].equals("-b") || args[argi].equals("--brute-force-query")) {
doBruteForceQuery = true;
argi++;
} else if (args[argi].equals("-u") || args[argi].equals("--unified-output")) {
doUnifiedOutput = true;
argi++;
} else if (args[argi].equals("-d")) {
if ((argc - argi) < 2)
usage(1);
try {
distanceThreshold = Integer.parseInt(args[argi+1]);
} catch (NumberFormatException e) {
usage(1);
}
argi += 2;
} else if (args[argi].equals("-v")) {
doInvert = true;
argi++;
} else {
usage(1);
}
}
if ((argc - argi) != 2) {
usage(1);
}
String needlesFilename = args[argi];
String haystackFilename = args[argi+1];
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
long t1, t2;
double duration; // in seconds
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Load hashes+metadata.
Vector<Hash256AndMetadata<String>> needles = new Vector<Hash256AndMetadata<String>>();
Vector<Hash256AndMetadata<String>> haystack = new Vector<Hash256AndMetadata<String>>();
t1 = System.nanoTime();
HashReaderUtil.loadHashesAndMetadataFromFileOrDie(PROGNAME, needlesFilename, needles);
HashReaderUtil.loadHashesAndMetadataFromFileOrDie(PROGNAME, haystackFilename, haystack);
t2 = System.nanoTime();
duration = (t2 - t1) / 1e9;
System.out.printf("read_seconds=%.3e\n", duration);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Build the MIH data structure.
t1 = System.nanoTime();
MIH256<String> mih = new MIH256<String>();
for (Hash256AndMetadata<String> pair : haystack) {
mih.insert(pair.hash, pair.metadata);
}
t2 = System.nanoTime();
duration = (t2 - t1) / 1e9;
System.out.printf("build_seconds=%.3e\n", duration);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Do the lookups.
t1 = System.nanoTime();
if (doInvert) {
for (Hash256AndMetadata<String> needlePair : needles) {
Hash256AndMetadata<String> matchPair = null;
try {
matchPair = doBruteForceQuery
? mih.bruteForceQueryAny(needlePair.hash, distanceThreshold)
: mih.queryAny(needlePair.hash, distanceThreshold);
} catch (MIHDimensionExceededException e) {
System.err.printf("%s: %s\n", PROGNAME, e.getErrorMessage());
System.exit(1);
}
if (matchPair == null) {
System.out.printf("hash=%s,%s\n",
needlePair.hash.toString(), needlePair.metadata);
}
}
} else if (doUnifiedOutput) {
Vector<Hash256AndMetadata<String>> matches = new Vector<Hash256AndMetadata<String>>();
for (Hash256AndMetadata<String> needlePair : needles) {
matches.clear();
try {
if (doBruteForceQuery) {
mih.bruteForceQueryAll(needlePair.hash, distanceThreshold, matches);
} else {
mih.queryAll(needlePair.hash, distanceThreshold, matches);
}
} catch (MIHDimensionExceededException e) {
System.err.printf("%s: %s\n", PROGNAME, e.getErrorMessage());
System.exit(1);
}
if (matches.isEmpty()) {
System.out.printf("needle=%s,%s,matches=none\n", needlePair.hash.toString(), needlePair.metadata);
} else {
for (Hash256AndMetadata<String> matchPair : matches) {
System.out.printf("d=%d,needle=%s,%s,match=%s,%s\n",
matchPair.hash.hammingDistance(needlePair.hash),
needlePair.hash.toString(),
needlePair.metadata,
matchPair.hash.toString(),
matchPair.metadata);
}
}
}
} else {
Vector<Hash256AndMetadata<String>> matches = new Vector<Hash256AndMetadata<String>>();
boolean first = true;
for (Hash256AndMetadata<String> needlePair : needles) {
if (!first) {
System.out.printf("\n");
}
first = false;
System.out.printf("needle=%s\n", needlePair.hash.toString());
matches.clear();
try {
if (doBruteForceQuery) {
mih.bruteForceQueryAll(needlePair.hash, distanceThreshold, matches);
} else {
mih.queryAll(needlePair.hash, distanceThreshold, matches);
}
} catch (MIHDimensionExceededException e) {
System.err.printf("%s: %s\n", PROGNAME, e.getErrorMessage());
System.exit(1);
}
for (Hash256AndMetadata<String> matchPair : matches) {
System.out.printf("d=%d,match=%s,%s\n",
matchPair.hash.hammingDistance(needlePair.hash),
matchPair.hash.toString(),
matchPair.metadata);
}
}
}
t2 = System.nanoTime();
duration = (t2 - t1) / 1e9;
System.out.printf("query_seconds=%.3e\n", duration);
}