in Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/preprocessor/ImputePreprocessor.java [91:153]
void prepareInitialInput() {
boolean[][] missing = new boolean[initialValues.length][inputLength];
for (int i = 0; i < initialValues.length; i++) {
Arrays.fill(missing[i], false);
int length = initialValues[i].length - inputLength;
for (int j = 0; j < length; j++) {
missing[i][(int) initialValues[i][inputLength + j]] = true;
}
}
boolean[] startingValuesSet = new boolean[inputLength];
if (imputationMethod == ZERO) {
for (int i = 0; i < initialValues.length - 1; i++) {
for (int j = 0; j < inputLength; j++) {
initialValues[i][j] = (missing[i][j]) ? initialValues[i][j] : 0;
}
}
} else if (imputationMethod == FIXED_VALUES || defaultFill != null) {
for (int i = 0; i < initialValues.length - 1; i++) {
for (int j = 0; j < inputLength; j++) {
initialValues[i][j] = (missing[i][j]) ? initialValues[i][j] : defaultFill[j];
}
}
} else { // no simple alternative other than linear interpolation
for (int j = 0; j < inputLength; j++) {
int next = 0;
while (next < initialValues.length && missing[next][j]) {
++next;
}
startingValuesSet[j] = (next < initialValues.length);
if (startingValuesSet[j]) {
initialValues[0][j] = initialValues[next][j];
missing[0][j] = false;
// note if the first value si present then i==0
int start = 0;
while (start < initialValues.length - 1) {
int end = start + 1;
while (end < initialValues.length && missing[end][j]) {
++end;
}
if (end < initialValues.length && end > start + 1) {
for (int y = start + 1; y < end; y++) { // linear interpolation
double factor = (1.0 * initialTimeStamps[start] - initialTimeStamps[y])
/ (initialTimeStamps[start] - initialTimeStamps[end]);
initialValues[y][j] = factor * initialValues[start][j]
+ (1 - factor) * initialValues[end][j];
}
}
start = end;
}
} else {
// set 0; note there is no value in the entire column.
for (int y = 0; y < initialValues.length; y++) {
initialValues[y][j] = 0;
}
}
}
}
// truncate to input length, since the missing values were stored as well
for (int i = 0; i < initialValues.length; i++) {
initialValues[i] = Arrays.copyOf(initialValues[i], inputLength);
}
}