in src/main/java/org/apache/sysds/runtime/iogen/codegen/CodeGenTrie.java [213:410]
private CodeGenTrieNode getJavaCodeRegular(CodeGenTrieNode node, StringBuilder src, String currPos) {
ArrayList<CodeGenTrieNode> nodes = new ArrayList<>();
if(node.getChildren().size() == 1) {
nodes.add(node);
CodeGenTrieNode cn = node.getChildren().get(node.getChildren().keySet().iterator().next());
do {
if(cn.getChildren().size() <= 1) {
nodes.add(cn);
if(cn.getChildren().size() == 1)
cn = cn.getChildren().get(cn.getChildren().keySet().iterator().next());
else
break;
}
else
break;
}
while(true);
if(nodes.size() > 1) {
boolean isKeySingle;
boolean isIndexSequence = true;
// extract keys and related indexes
ArrayList<String> keys = new ArrayList<>();
ArrayList<String> colIndexes = new ArrayList<>();
ArrayList<Integer> colIndexesExtra = new ArrayList<>();
int tmpIndex = 0;
for(CodeGenTrieNode n : nodes) {
keys.add(n.getKey());
if(n.isEndOfCondition())
colIndexes.add(n.getColIndex());
else
colIndexesExtra.add(tmpIndex);
tmpIndex++;
}
if(keys.size() != colIndexes.size()) {
if(keys.size() == colIndexes.size() + 1 && colIndexesExtra.get(0) == 0) {}
else
return null;
}
// are keys single?
HashSet<String> keysSet = new HashSet<>();
for(int i = 1; i < keys.size(); i++)
keysSet.add(keys.get(i));
isKeySingle = keysSet.size() == 1;
for(int i = 1; i < colIndexes.size() && isIndexSequence; i++) {
isIndexSequence =
Integer.parseInt(colIndexes.get(i)) - Integer.parseInt(colIndexes.get(i - 1)) == 1;
}
// Case 1: key = single and index = sequence
// Case 2: key = single and index = irregular
// Case 3: key = multi and index = sequence
// Case 4: key = multi and index = irregular
String tmpDest = destination.split("\\.")[0];
int[] cols = new int[colIndexes.size()];
for(int i = 0; i < cols.length; i++)
cols[i] = Integer.parseInt(colIndexes.get(i));
// check 1:
String conflict = !isMatrix ? formatIdentifyer.getConflictToken(cols) : null;
// check is array has conflict?
// if the array has just one item, the if-then-else is a good option
// otherwise we will follow loop code gen
if(colIndexes.size() == 1) {
if(conflict != null) {
src.append("// conflict token : " + conflict + " appended to end of value token list \n");
properties.endWithValueStrings()[Integer.parseInt(colIndexes.get(0))].add(conflict);
}
else
src.append("// conflict token for find end of array was NULL \n");
//getJavaCodeIndexOf(node, src, currPos, false);
}
else {
boolean isDelimAndSuffixesSame = false;
// #Case 1: key = single and index = sequence
if(isKeySingle && isIndexSequence) {
String baseIndex = colIndexes.get(0);
String key = keysSet.iterator().next();
String mKey = refineKeyForSearch(key);
String colIndex = getRandomName("colIndex");
if(!isMatrix) {
isDelimAndSuffixesSame = formatIdentifyer.isDelimAndSuffixesSame(key, cols, conflict);
if(conflict != null) {
src.append("indexConflict=")
.append("str.indexOf(" + refineKeyForSearch(conflict) + "," + currPos + "); \n");
src.append("if (indexConflict != -1) \n");
src.append(
"parts = IOUtilFunctions.splitCSV(str.substring(" + currPos + ", indexConflict)," +
mKey + "); \n");
src.append("else \n");
}
src.append(
"parts=IOUtilFunctions.splitCSV(str.substring(" + currPos + "), " + mKey + "); \n");
src.append("int ").append(colIndex).append("; \n");
src.append("for (int i=0; i< Math.min(parts.length, " + colIndexes.size() + "); i++) {\n");
src.append(colIndex).append(" = i+").append(baseIndex).append("; \n");
if(isDelimAndSuffixesSame) {
if(!isMatrix)
src.append(destination).append(
"(row," + colIndex + ",UtilFunctions.stringToObject(" + tmpDest +
".getSchema()[" + colIndex + "], parts[i])); \n");
else
src.append(destination).append(
"(row," + colIndex + ",UtilFunctions.parseToDouble(parts[i], null)); \n");
}
else {
src.append(
"endPos=TemplateUtil.getEndPos(parts[i], parts[i].length(),0,endWithValueString[" +
colIndex + "]); \n");
if(!isMatrix)
src.append(destination).append(
"(row," + colIndex + ",UtilFunctions.stringToObject(" + tmpDest +
".getSchema()[" + colIndex + "], parts[i].substring(0,endPos))); \n");
else
src.append(destination).append("(row," + colIndex +
",UtilFunctions.parseToDouble(parts[i].substring(0,endPos), null)); \n");
}
src.append("} \n");
if(conflict != null) {
src.append("if (indexConflict !=-1) \n");
src.append("index = indexConflict; \n");
}
}
return cn;
}
// #Case 2: key = single and index = irregular
if(isKeySingle && !isIndexSequence) {
StringBuilder srcColIndexes = new StringBuilder("new int[]{");
for(String c : colIndexes)
srcColIndexes.append(c).append(",");
srcColIndexes.deleteCharAt(srcColIndexes.length() - 1);
srcColIndexes.append("}");
String colIndexName = getRandomName("targetColIndex");
src.append("int[] ").append(colIndexName).append("=").append(srcColIndexes).append("; \n");
String key = keysSet.iterator().next();
String mKey = refineKeyForSearch(key);
if(!isMatrix) {
isDelimAndSuffixesSame = formatIdentifyer.isDelimAndSuffixesSame(key, cols, conflict);
if(conflict != null) {
src.append("indexConflict = ")
.append("str.indexOf(" + refineKeyForSearch(conflict) + "," + currPos + "); \n");
src.append("if (indexConflict != -1) \n");
src.append(
"parts = IOUtilFunctions.splitCSV(str.substring(" + currPos + ", indexConflict), " +
mKey + "); \n");
src.append("else \n");
}
}
src.append(
"parts = IOUtilFunctions.splitCSV(str.substring(" + currPos + "), " + mKey + "); \n");
src.append("for (int i=0; i< Math.min(parts.length, " + colIndexes.size() + "); i++) {\n");
if(isDelimAndSuffixesSame) {
if(!isMatrix) {
src.append(destination).append(
"(row," + colIndexName + "[i],UtilFunctions.stringToObject(" + tmpDest +
".getSchema()[" + colIndexName + "[i]], parts[i])); \n");
}
else
src.append(destination).append(
"(row," + colIndexName + "[i],UtilFunctions.parseToDouble(parts[i], null)); \n");
}
else {
if(!isMatrix) {
src.append(
"endPos = TemplateUtil.getEndPos(parts[i], parts[i].length(), 0, endWithValueString[" +
colIndexName + "[i]]); \n");
src.append(destination).append(
"(row," + colIndexName + "[i],UtilFunctions.stringToObject(" + tmpDest +
".getSchema()[" + colIndexName + "[i]], parts[i].substring(0, endPos))); \n");
}
else
src.append(destination).append("(row," + colIndexName +
"[i],UtilFunctions.parseToDouble(parts[i].substring(0, endPos), null)); \n");
}
src.append("} \n");
if(conflict != null) {
src.append("if (indexConflict !=-1) \n");
src.append("index = indexConflict; \n");
}
return cn;
}
// #Case 3: key = multi and index = sequence
// #Case 4: key = multi and index = irregular
else
return null;
}
}
else
return null;
}
return null;
}