in wayang-profiler/code/main/java/org/apache/wayang/profiler/spark/Main.java [45:176]
public static void main(String[] args) {
if (args.length < 2) {
System.err.printf("Usage: java %s <operator to profile> [<cardinality n>[,<cardinality n>]*]+ \n", Main.class);
System.exit(1);
}
String operator = args[0];
List<List<Long>> allCardinalities = new LinkedList<>();
for (int i = 1; i < args.length; i++) {
List<Long> cardinalities = Arrays.stream(args[i].split(",")).map(Long::valueOf).collect(Collectors.toList());
allCardinalities.add(cardinalities);
}
List<SparkOperatorProfiler.Result> results;
switch (operator) {
case "textsource":
results = profile(OperatorProfilers.createSparkTextFileSourceProfiler(), allCardinalities);
break;
case "collectionsource":
results = profile(OperatorProfilers.createSparkCollectionSourceProfiler(), allCardinalities);
break;
case "map":
results = profile(OperatorProfilers.createSparkMapProfiler(), allCardinalities);
break;
case "filter":
results = profile(OperatorProfilers.createSparkFilterProfiler(), allCardinalities);
break;
case "flatmap":
results = profile(OperatorProfilers.createSparkFlatMapProfiler(), allCardinalities);
break;
case "reduce":
results = profile(OperatorProfilers.createSparkReduceByProfiler(), allCardinalities);
break;
case "globalreduce":
results = profile(OperatorProfilers.createSparkGlobalReduceProfiler(), allCardinalities);
break;
case "distinct":
case "distinct-string":
results = profile(OperatorProfilers.createSparkDistinctProfiler(), allCardinalities);
break;
case "distinct-integer":
results = profile(OperatorProfilers.createSparkDistinctProfiler(
DataGenerators.createReservoirBasedIntegerSupplier(new ArrayList<>(), 0.7d, new Random(42)),
Integer.class,
new Configuration()
), allCardinalities);
break;
case "sort":
case "sort-string":
results = profile(OperatorProfilers.createSparkSortProfiler(), allCardinalities);
break;
case "sort-integer":
results = profile(OperatorProfilers.createSparkSortProfiler(
DataGenerators.createReservoirBasedIntegerSupplier(new ArrayList<>(), 0.7d, new Random(42)),
Integer.class,
new Configuration()
), allCardinalities);
break;
case "count":
results = profile(OperatorProfilers.createSparkCountProfiler(), allCardinalities);
break;
case "groupby":
results = profile(OperatorProfilers.createSparkMaterializedGroupByProfiler(), allCardinalities);
break;
case "join":
results = profile(OperatorProfilers.createSparkJoinProfiler(), allCardinalities);
break;
case "union":
results = profile(OperatorProfilers.createSparkUnionProfiler(), allCardinalities);
break;
case "cartesian":
results = profile(OperatorProfilers.createSparkCartesianProfiler(), allCardinalities);
break;
case "callbacksink":
results = profile(OperatorProfilers.createSparkLocalCallbackSinkProfiler(), allCardinalities);
break;
// case "word-count-split": {
// final Supplier<String> randomStringSupplier = DataGenerators.createRandomStringSupplier(2, 10, new Random(42));
// results = profile(
// org.apache.wayang.profiler.java.OperatorProfilers.createJavaFlatMapProfiler(
// () -> String.format("%s %s %s %s %s %s %s %s %s",
// randomStringSupplier.get(), randomStringSupplier.get(),
// randomStringSupplier.get(), randomStringSupplier.get(),
// randomStringSupplier.get(), randomStringSupplier.get(),
// randomStringSupplier.get(), randomStringSupplier.get(),
// randomStringSupplier.get()),
// str -> Arrays.asList(str.split(" ")),
// String.class,
// String.class
// ),
// cardinalities);
// break;
// }
// case "word-count-canonicalize": {
// final Supplier<String> randomStringSupplier = DataGenerators.createRandomStringSupplier(2, 10, new Random(42));
// results = profile(
// org.apache.wayang.profiler.java.OperatorProfilers.createJavaMapProfiler(
// randomStringSupplier,
// word -> new Tuple2<>(word.toLowerCase(), 1),
// String.class,
// Tuple2.class
// ),
// cardinalities
// );
// break;
// }
// case "word-count-count": {
// final Supplier<String> stringSupplier = DataGenerators.createReservoirBasedStringSupplier(new ArrayList<>(), 0.7, new Random(42), 2, 10);
// results = profile(
// org.apache.wayang.profiler.java.OperatorProfilers.createJavaReduceByProfiler(
// () -> new Tuple2<>(stringSupplier.get(), 1),
// pair -> pair.field0,
// (p1, p2) -> {
// p1.field1 += p2.field1;
// return p1;
// },
// cast(Tuple2.class),
// String.class
// ),
// cardinalities
// );
// break;
// }
default:
System.out.println("Unknown operator: " + operator);
return;
}
System.out.println();
System.out.println(WayangCollections.getAny(results).getCsvHeader());
results.forEach(result -> System.out.println(result.toCsvString()));
}