in Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/preprocessor/Preprocessor.java [149:225]
public Preprocessor(Builder<?> builder) {
checkArgument(builder.transformMethod != null, "transform required");
checkArgument(builder.forestMode != null, " forest mode is required");
checkArgument(builder.inputLength > 0, "incorrect input length");
checkArgument(builder.shingleSize > 0, "incorrect shingle size");
checkArgument(builder.dimensions > 0, "incorrect dimensions");
checkArgument(builder.shingleSize == 1 || builder.dimensions % builder.shingleSize == 0,
" shingle size should divide the dimensions");
checkArgument(builder.forestMode == ForestMode.TIME_AUGMENTED || builder.inputLength == builder.dimensions
|| builder.inputLength * builder.shingleSize == builder.dimensions, "incorrect input size");
checkArgument(
builder.forestMode != ForestMode.TIME_AUGMENTED
|| (builder.inputLength + 1) * builder.shingleSize == builder.dimensions,
"incorrect input size");
checkArgument(builder.startNormalization <= builder.stopNormalization, "incorrect normalization parameters");
checkArgument(builder.startNormalization > 0 || !builder.normalizeTime, " start of normalization cannot be 0");
checkArgument(
builder.startNormalization > 0 || !(builder.transformMethod == TransformMethod.NORMALIZE
|| builder.transformMethod == TransformMethod.NORMALIZE_DIFFERENCE),
" start of normalization cannot be 0 for these transformations");
checkArgument(builder.weights == null || builder.weights.length >= builder.inputLength, " incorrect weights");
inputLength = builder.inputLength;
dimension = builder.dimensions;
shingleSize = builder.shingleSize;
mode = builder.forestMode;
lastShingledPoint = new double[dimension];
this.transformMethod = builder.transformMethod;
this.startNormalization = builder.startNormalization;
this.stopNormalization = builder.stopNormalization;
this.normalizeTime = builder.normalizeTime;
this.weights = new double[inputLength + 1];
Arrays.fill(weights, 1);
if (builder.weights != null) {
if (builder.weights.length == inputLength) {
System.arraycopy(builder.weights, 0, weights, 0, inputLength);
weights[inputLength] = builder.weightTime;
} else {
System.arraycopy(builder.weights, 0, weights, 0, inputLength + 1);
}
} else {
weights[inputLength] = builder.weightTime;
}
previousTimeStamps = new long[shingleSize];
if (inputLength == dimension) {
lastShingledInput = new double[dimension];
} else {
lastShingledInput = new double[shingleSize * inputLength];
}
double discount = builder.timeDecay;
dataQuality = builder.dataQuality.orElse(new Deviation(discount));
if (this.transformMethod != TransformMethod.NONE && this.transformMethod != TransformMethod.DIFFERENCE) {
if (builder.deviations.isPresent()) {
deviationList = builder.deviations.get();
} else {
deviationList = new Deviation[inputLength];
for (int i = 0; i < inputLength; i++) {
deviationList[i] = new Deviation(discount);
}
}
}
timeStampDeviation = builder.timeDeviation.orElse(new Deviation(discount));
if (mode == ForestMode.STREAMING_IMPUTE) {
imputationMethod = builder.imputationMethod;
normalizeTime = true;
if (imputationMethod == FIXED_VALUES) {
int baseDimension = builder.dimensions / builder.shingleSize;
// shingling will be performed in this layer and not in forest
// so that we control admittance of imputed shingles
checkArgument(builder.fillValues != null && builder.fillValues.length == baseDimension,
" the number of values should match the shingled input");
this.defaultFill = Arrays.copyOf(builder.fillValues, builder.fillValues.length);
}
this.useImputedFraction = builder.useImputedFraction.orElse(0.5);
}
}