in pdq/java/src/main/java/pdqhashing/tools/Clusterize256Tool.java [80:190]
public static void main(String[] args) {
boolean verbose = false;
boolean separateClusters = false;
boolean snowball = true;
boolean doBruteForceQuery = false;
int distanceThreshold = DEFAULT_PDQ_DISTANCE_THRESHOLD;
int traceCount = 0;
// Parse command-line flags. I'm explicitly not using gflags or other such
// libraries, to minimize the number of external dependencies for this
// project.
int argi = 0;
int argc = args.length;
while (argi < argc) {
if (!args[argi].startsWith("-")) {
break;
}
if (args[argi].equals("-h") || args[argi].equals("--help")) {
usage(0);
} else if (args[argi].equals("-v") || args[argi].equals("--verbose")) {
verbose = true;
argi++;
} else if (args[argi].equals("-s") || args[argi].equals("--separate-clusters")) {
separateClusters = true;
argi++;
} else if (args[argi].equals("--snowball")) {
snowball = true;
argi++;
} else if (args[argi].equals("--non-snowball")) {
snowball = false;
argi++;
} else if (args[argi].equals("-b") || args[argi].equals("--brute-force-query")) {
doBruteForceQuery = true;
argi++;
} else if (args[argi].equals("-d")) {
if ((argc - argi) < 2)
usage(1);
try {
distanceThreshold = Integer.parseInt(args[argi+1]);
} catch (NumberFormatException e) {
usage(1);
}
argi += 2;
} else if (args[argi].equals("--trace")) {
if ((argc - argi) < 2)
usage(1);
try {
traceCount = Integer.parseInt(args[argi+1]);
} catch (NumberFormatException e) {
usage(1);
}
argi += 2;
} else {
usage(1);
}
}
args = Arrays.copyOfRange(args, argi, argc);
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Load input hashes+metadata
Vector<Hash256AndMetadata<String>> vectorOfPairs = new Vector<Hash256AndMetadata<String>>();
HashReaderUtil.loadHashesAndMetadataFromFilesOrDie(PROGNAME, args, vectorOfPairs);
if (verbose) {
System.out.printf("ORIGINAL VECTOR OF PAIRS:\n");
for (Hash256AndMetadata<String> pair : vectorOfPairs) {
System.out.printf("%s,%s\n", pair.hash.toString(), pair.metadata);
}
System.out.printf("\n");
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Build the mutually-indexed hash
MIH256<String> mih = new MIH256<String>();
// We could insertAll, but instead loop so we can trace.
// mih.insertAll(vectorOfPairs);
int i = 0;
for (Hash256AndMetadata<String> pair : vectorOfPairs) {
if (traceCount > 0) {
if ((i % traceCount) == 0) {
System.err.printf("i %d\n", i);
}
}
i++;
mih.insert(pair.hash, pair.metadata);
}
if (verbose) {
System.out.printf("MIH:\n");
mih.dump(System.out);
System.out.printf("\n");
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Clusterize
if (snowball) {
snowballClusterize(vectorOfPairs, mih,
separateClusters, traceCount, doBruteForceQuery, distanceThreshold);
}
else {
radiallyClusterize(vectorOfPairs, mih,
separateClusters, traceCount, doBruteForceQuery, distanceThreshold);
}
}