in archived/ModernizeYourDatabases2019/ModernizeSQL2019/Module 5 Activity - Modern Development Platform/java/Ngram.java [44:83]
public static void getNGrams() {
System.out.println("inputDataCol1.length= "+ inputDataCol1.length);
if (inputDataCol1.length == 0 ) {
// TODO: Set empty return
return;
}
//Using a stream to "loop" over the input data inputDataCol1.length. You can also use a for loop for this.
final List<InputRow> inputDataSet = IntStream.range(0, inputDataCol1.length)
.mapToObj(i -> new InputRow(inputDataCol1[i], inputDataCol2[i]))
.collect(Collectors.toList());
//Again, we are using a stream to loop over data
final List<OutputRow> outputDataSet = inputDataSet.stream()
// Generate ngrams of size n for each incoming string
// Each invocation of ngrams returns a list. flatMap flattens
// the resulting list-of-lists to a flat list.
.flatMap(inputRow -> ngrams(param1, inputRow.text).stream().map(s -> new OutputRow(inputRow.id, s)))
.collect(Collectors.toList());
//Print the outputDataSet
System.out.println(outputDataSet);
//Set the number of rows and columns we will be returning
numberOfOutputCols = 2;
numberOfRows = outputDataSet.size();
outputDataCol1 = new int[numberOfRows]; // ID column
outputDataCol2 = new String[numberOfRows]; //The ngram column
outputNullMap = new boolean[2][numberOfRows];// output null map
//Since we don't have any null values, we will populate all values in the outputNullMap to false
IntStream.range(0, numberOfRows).forEach(i -> {
final OutputRow outputRow = outputDataSet.get(i);
outputDataCol1[i] = outputRow.id;
outputDataCol2[i] = outputRow.ngram;
outputNullMap[0][i] = false;
outputNullMap[1][i] = false;
});
}