public static void main()

in pdq/java/src/main/java/pdqhashing/tools/Clusterize256Tool.java [80:190]


  public static void main(String[] args) {
    boolean verbose = false;
    boolean separateClusters = false;
    boolean snowball = true;
    boolean doBruteForceQuery = false;
    int  distanceThreshold = DEFAULT_PDQ_DISTANCE_THRESHOLD;
    int  traceCount = 0;

    // Parse command-line flags. I'm explicitly not using gflags or other such
    // libraries, to minimize the number of external dependencies for this
    // project.
    int argi = 0;
    int argc = args.length;
    while (argi < argc) {
      if (!args[argi].startsWith("-")) {
        break;
      }

      if (args[argi].equals("-h") || args[argi].equals("--help")) {
        usage(0);
      } else if (args[argi].equals("-v") || args[argi].equals("--verbose")) {
        verbose = true;
        argi++;
      } else if (args[argi].equals("-s") || args[argi].equals("--separate-clusters")) {
        separateClusters = true;
        argi++;
      } else if (args[argi].equals("--snowball")) {
        snowball = true;
        argi++;
      } else if (args[argi].equals("--non-snowball")) {
        snowball = false;
        argi++;
      } else if (args[argi].equals("-b") || args[argi].equals("--brute-force-query")) {
        doBruteForceQuery = true;
        argi++;

      } else if (args[argi].equals("-d")) {
        if ((argc - argi) < 2)
          usage(1);
        try {
          distanceThreshold = Integer.parseInt(args[argi+1]);
        } catch (NumberFormatException e) {
          usage(1);
        }
        argi += 2;

      } else if (args[argi].equals("--trace")) {
        if ((argc - argi) < 2)
          usage(1);
        try {
          traceCount = Integer.parseInt(args[argi+1]);
        } catch (NumberFormatException e) {
          usage(1);
        }
        argi += 2;

      } else {
        usage(1);
      }
    }
    args = Arrays.copyOfRange(args, argi, argc);

    //  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    // Load input hashes+metadata

    Vector<Hash256AndMetadata<String>> vectorOfPairs = new Vector<Hash256AndMetadata<String>>();

    HashReaderUtil.loadHashesAndMetadataFromFilesOrDie(PROGNAME, args, vectorOfPairs);

    if (verbose) {
      System.out.printf("ORIGINAL VECTOR OF PAIRS:\n");
      for (Hash256AndMetadata<String> pair : vectorOfPairs) {
        System.out.printf("%s,%s\n", pair.hash.toString(), pair.metadata);
      }
      System.out.printf("\n");
    }

    //  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    // Build the mutually-indexed hash

    MIH256<String> mih = new MIH256<String>();
    // We could insertAll, but instead loop so we can trace.
    // mih.insertAll(vectorOfPairs);
    int i = 0;
    for (Hash256AndMetadata<String> pair : vectorOfPairs) {
      if (traceCount > 0) {
        if ((i % traceCount) == 0) {
          System.err.printf("i %d\n", i);
        }
      }
      i++;
      mih.insert(pair.hash, pair.metadata);
    }

    if (verbose) {
      System.out.printf("MIH:\n");
      mih.dump(System.out);
      System.out.printf("\n");
    }

    //  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    // Clusterize
    if (snowball) {
      snowballClusterize(vectorOfPairs, mih,
        separateClusters, traceCount, doBruteForceQuery, distanceThreshold);
    }
    else {
      radiallyClusterize(vectorOfPairs, mih,
        separateClusters, traceCount, doBruteForceQuery, distanceThreshold);
    }
  }