in wayang-profiler/code/main/java/org/apache/wayang/profiler/log/GeneticOptimizerApp.java [104:203]
public GeneticOptimizerApp(Configuration configuration) {
this.configuration = configuration;
// Initialize platforms.
Java.platform();
Spark.platform();
Sqlite3.platform();
Postgres.platform();
//TODO: add dinamically Graphchi, if the version of scala is 2.11
// Load the ExecutionLog.
double samplingFactor = this.configuration.getDoubleProperty("wayang.profiler.ga.sampling", 1d);
double maxCardinalitySpread = this.configuration.getDoubleProperty("wayang.profiler.ga.max-cardinality-spread", 1d);
double minCardinalityConfidence = this.configuration.getDoubleProperty("wayang.profiler.ga.min-cardinality-confidence", 1d);
long minExecutionTime = this.configuration.getLongProperty("wayang.profiler.ga.min-exec-time", 1);
try (ExecutionLog executionLog = ExecutionLog.open(configuration)) {
this.partialExecutions = executionLog.stream().collect(Collectors.toList());
int lastSize = this.partialExecutions.size();
this.partialExecutions.removeIf(partialExecution -> !this.checkEstimatorTemplates(partialExecution));
int newSize = this.partialExecutions.size();
System.out.printf("Removed %d executions with no template-based estimators.\n", lastSize - newSize);
lastSize = newSize;
this.partialExecutions.removeIf(partialExecution -> !this.checkSpread(partialExecution, maxCardinalitySpread));
newSize = this.partialExecutions.size();
System.out.printf("Removed %d executions with a too large cardinality spread (> %.2f).\n", lastSize - newSize, minCardinalityConfidence);
lastSize = newSize;
this.partialExecutions.removeIf(partialExecution -> !this.checkNonEmptyCardinalities(partialExecution));
newSize = this.partialExecutions.size();
System.out.printf("Removed %d executions with zero cardinalities.\n", lastSize - newSize);
lastSize = newSize;
this.partialExecutions.removeIf(partialExecution -> !this.checkConfidence(partialExecution, minCardinalityConfidence));
newSize = this.partialExecutions.size();
System.out.printf("Removed %d executions with a too low cardinality confidence (< %.2f).\n", lastSize - newSize, minCardinalityConfidence);
lastSize = newSize;
this.partialExecutions.removeIf(partialExecution -> partialExecution.getMeasuredExecutionTime() < minExecutionTime);
newSize = this.partialExecutions.size();
System.out.printf("Removed %d executions with a too short runtime (< %,d ms).\n", lastSize - newSize, minExecutionTime);
lastSize = newSize;
this.partialExecutions.removeIf(partialExecution -> new Random().nextDouble() > samplingFactor);
newSize = this.partialExecutions.size();
System.out.printf("Removed %d executions due to sampling.\n", lastSize - newSize);
} catch (Exception e) {
throw new WayangException("Could not evaluate execution log.", e);
}
// Group the PartialExecutions.
this.partialExecutionGroups = this.groupPartialExecutions(this.partialExecutions).entrySet().stream()
.sorted(Comparator.comparingInt(e -> e.getKey().size()))
.map(Map.Entry::getValue)
.collect(Collectors.toList());
// Apply binning if requested.
double binningStretch = this.configuration.getDoubleProperty("wayang.profiler.ga.binning", 1.1d);
if (binningStretch > 1d) {
System.out.print("Applying binning... ");
int numOriginalPartialExecutions = this.partialExecutions.size();
this.partialExecutions.clear();
for (List<PartialExecution> group : this.partialExecutionGroups) {
final Collection<PartialExecution> reducedGroup = this.binByExecutionTime(group, binningStretch);
group.retainAll(reducedGroup);
this.partialExecutions.addAll(reducedGroup);
}
System.out.printf(
"reduced the number of partial executions from %d to %d.\n",
numOriginalPartialExecutions, this.partialExecutions.size()
);
}
// Initialize the optimization space with its LoadProfileEstimators and associated Variables.
this.optimizationSpace = new OptimizationSpace();
this.estimators = new HashMap<>();
this.platformOverheads = new HashMap<>();
for (PartialExecution partialExecution : this.partialExecutions) {
// Instrument the partial executions.
for (AtomicExecutionGroup executionGroup : partialExecution.getAtomicExecutionGroups()) {
for (AtomicExecution atomicExecution : executionGroup.getAtomicExecutions()) {
this.instrument(atomicExecution);
}
}
for (Platform platform : partialExecution.getInitializedPlatforms()) {
this.platformOverheads.computeIfAbsent(
platform,
key -> this.optimizationSpace.getOrCreateVariable(key.getClass().getCanonicalName() + "->overhead")
);
}
}
System.out.printf(
"Loaded %d execution records with %d template-based estimators types and %d platform overheads.\n",
this.partialExecutions.size(), estimators.keySet().size(), this.platformOverheads.size()
);
}