in metron-analytics/metron-statistics/src/main/java/org/apache/metron/statistics/approximation/HLLPMeasurement.java [40:150]
public static void main(String[] args) {
Options options = new Options();
try {
CommandLineParser parser = new PosixParser();
CommandLine cmd = null;
try {
cmd = ParserOptions.parse(parser, args);
} catch (ParseException pe) {
pe.printStackTrace();
final HelpFormatter usageFormatter = new HelpFormatter();
usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
System.exit(-1);
}
if (cmd.hasOption("h")) {
final HelpFormatter usageFormatter = new HelpFormatter();
usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
System.exit(0);
}
final String chartDelim = ParserOptions.CHART_DELIM.get(cmd, "|");
final int numTrials = Integer.parseInt(ParserOptions.NUM_TRIALS.get(cmd, "5000"));
final int cardMin = Integer.parseInt(ParserOptions.CARD_MIN.get(cmd, "200"));
final int cardMax = Integer.parseInt(ParserOptions.CARD_MAX.get(cmd, "1000"));
final int cardStep = Integer.parseInt(ParserOptions.CARD_STEP.get(cmd, "200"));
final int cardStart = (((cardMin - 1) / cardStep) * cardStep) + cardStep;
final int spMin = Integer.parseInt(ParserOptions.SP_MIN.get(cmd, "4"));
final int spMax = Integer.parseInt(ParserOptions.SP_MAX.get(cmd, "32"));
final int spStep = Integer.parseInt(ParserOptions.SP_STEP.get(cmd, "4"));
final int pMin = Integer.parseInt(ParserOptions.P_MIN.get(cmd, "4"));
final int pMax = Integer.parseInt(ParserOptions.P_MAX.get(cmd, "32"));
final int pStep = Integer.parseInt(ParserOptions.P_STEP.get(cmd, "4"));
final double errorPercentile = Double.parseDouble(ParserOptions.ERR_PERCENTILE.get(cmd, "50"));
final double timePercentile = Double.parseDouble(ParserOptions.TIME_PERCENTILE.get(cmd, "50"));
final double sizePercentile = Double.parseDouble(ParserOptions.SIZE_PERCENTILE.get(cmd, "50"));
final boolean formatErrPercent = Boolean.parseBoolean(ParserOptions.ERR_FORMAT_PERCENT.get(cmd, "true"));
final int errMultiplier = formatErrPercent ? 100 : 1;
final Function<Double, String> errorFormatter = (v -> ERR_FORMAT.format(v * errMultiplier));
final Function<Double, String> timeFormatter = (v -> TIME_FORMAT.format(v / NANO_TO_MILLIS));
final Function<Double, String> sizeFormatter = (v -> SIZE_FORMAT.format(v));
final String[] chartKey = new String[]{
"card: cardinality",
"sp: sparse precision value",
"p: normal precision value",
"err: error as a percent of the expected cardinality; ",
"time: total time to add all values to the hllp estimator and calculate a cardinality estimate",
"size: size of the hllp set in bytes once all values have been added for the specified cardinality",
"l=low, m=mid(based on percentile chosen), h=high, std=standard deviation"};
final String[] chartHeader = new String[]{"card", "sp", "p", "err l/m/h/std (% of actual)", "time l/m/h/std (ms)", "size l/m/h/std (b)"};
final int[] chartPadding = new int[]{10, 10, 10, 40, 40, 30};
if (spMin < pMin) {
throw new IllegalArgumentException("p must be <= sp");
}
if (spMax < pMax) {
throw new IllegalArgumentException("p must be <= sp");
}
println("Options Used");
println("------------");
println("num trials: " + numTrials);
println("card min: " + cardMin);
println("card max: " + cardMax);
println("card step: " + cardStep);
println("card start: " + cardStart);
println("sp min: " + spMin);
println("sp max: " + spMax);
println("sp step: " + spStep);
println("p min: " + pMin);
println("p max: " + pMax);
println("p step: " + pStep);
println("error percentile: " + errorPercentile);
println("time percentile: " + timePercentile);
println("size percentile: " + sizePercentile);
println("format err as %: " + formatErrPercent);
println("");
printHeading(chartKey, chartHeader, chartPadding, chartDelim);
for (int c = cardStart; c <= cardMax; c += cardStep) {
for (int sp = spMin; sp <= spMax; sp += spStep) {
for (int p = pMin; p <= pMax; p += pStep) {
DescriptiveStatistics errorStats = new DescriptiveStatistics();
DescriptiveStatistics timeStats = new DescriptiveStatistics();
DescriptiveStatistics sizeStats = new DescriptiveStatistics();
for (int i = 0; i < numTrials; i++) {
List<Object> trialSet = buildTrialSet(c);
Set unique = new HashSet();
unique.addAll(trialSet);
long distinctVals = unique.size();
HyperLogLogPlus hllp = new HyperLogLogPlus(p, sp);
long timeStart = System.nanoTime();
hllp.addAll(trialSet);
long dvEstimate = hllp.cardinality();
long timeEnd = System.nanoTime();
long timeElapsed = timeEnd - timeStart;
double rawError = Math.abs(dvEstimate - distinctVals) / (double) distinctVals;
errorStats.addValue(rawError);
timeStats.addValue(timeElapsed);
sizeStats.addValue(SerDeUtils.toBytes(hllp).length);
}
MeasureResultFormatter errorRF = new MeasureResultFormatter(errorStats, errorFormatter, errorPercentile);
MeasureResultFormatter timeRF = new MeasureResultFormatter(timeStats, timeFormatter, timePercentile);
MeasureResultFormatter sizeRF = new MeasureResultFormatter(sizeStats, sizeFormatter, sizePercentile);
println(formatWithPadding(new String[]{"" + c, "" + sp, "" + p, errorRF.getFormattedResults(), timeRF.getFormattedResults(), sizeRF.getFormattedResults()}, chartPadding, chartDelim));
}
}
}
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
}