public Object call()

in src/main/java/org/apache/sysds/runtime/iogen/FormatIdentifyer.java [1242:1436]


		public Object call() throws Exception {
			// Sort prefixesRemovedReverse list
			for(int c :colIndexes){
				keys[c] = new ArrayList<>();
				Map<String, ArrayList<Integer>> mapPrefixesRemovedReverse = new HashMap<>();
				for(int i=0; i<prefixesRemovedReverse[c].size(); i++) {
					StringBuilder sb = new StringBuilder();
					String str = prefixesRemovedReverse[c].get(i).replaceAll("\\d", Lop.OPERAND_DELIMITOR);
					for(int j = 0; j< str.length(); j++){
						String charStr = str.charAt(j)+"";
						if(!charStr.equals(Lop.OPERAND_DELIMITOR))
							sb.append(charStr);
						else if(sb.length() == 0 || !(sb.charAt(sb.length() -1)+"").equals(Lop.OPERAND_DELIMITOR))
							sb.append(Lop.OPERAND_DELIMITOR);
					}
					String sbStr = sb.toString();
					if(!mapPrefixesRemovedReverse.containsKey(sbStr))
						mapPrefixesRemovedReverse.put(sbStr, new ArrayList<>());
					mapPrefixesRemovedReverse.get(sbStr).add(i);
				}
				prefixesRemovedReverse[c] = new ArrayList<>();
				prefixesRemoved[c] = new ArrayList<>();
				prefixesRemovedReverseSort[c] = new ArrayList<>();

				for(String s: mapPrefixesRemovedReverse.keySet()){
					prefixesRemovedReverseSort[c].add(new Pair<>(s, mapPrefixesRemovedReverse.get(s).get(0)));
				}
				prefixesRemovedReverseSort[c].sort(AscendingPairStringComparator);
				for(Pair<String, Integer> pair: prefixesRemovedReverseSort[c]){
					prefixesRemovedReverse[c].add(pair.getKey());
					prefixesRemoved[c].add(new StringBuilder(pair.getKey()).reverse().toString());
				}
			}

			// build patterns:
			for(int c :colIndexes) {
				if(prefixesRemoved[c].size() == 1){
					keys[c] = new ArrayList<>();
					if(prefixesRemoved[c].get(0).length() == 0 || prefixesRemoved[c].get(0).equals(Lop.OPERAND_DELIMITOR))
						keys[c].add("");

					String[] lcsKey = prefixesRemoved[c].get(0).split(Lop.OPERAND_DELIMITOR);
					for(String sk : lcsKey)
						if(sk.length() > 0)
							keys[c].add(sk);
					continue;
				}

				String firstKey;
				// STEP 1: find fist key:
				String selectedString = prefixesRemoved[c].get(0);
				boolean flag = true;
				StringBuilder sbToken = new StringBuilder();
				sbToken.append(selectedString.charAt(selectedString.length() -1));
				for(int i = 2; i < selectedString.length() && flag; i++) {
					char ch = selectedString.charAt(selectedString.length()-i);
					for(int j = 1; j < prefixesRemoved[c].size() && flag; j++) {
						String str = prefixesRemoved[c].get(j);
						flag = str.charAt(str.length()-i) == ch;
					}
					if(flag)
						sbToken.append(ch);
				}
				firstKey = sbToken.reverse().toString();
				flag = true;

				String[] lcsKey = firstKey.split(Lop.OPERAND_DELIMITOR);
				ArrayList<String> tmpList = new ArrayList<>();
				for(String sk : lcsKey)
					if(sk.length() > 0)
						tmpList.add(sk);

				for(int i = 0; i < prefixes[c].size() && flag; i++)
					flag = getIndexOfKeyPatternOnString(prefixes[c].get(i), tmpList, 0) == prefixes[c].get(i).length();

				if(flag) {
					keys[c] = tmpList;
					continue;
				}
				// STEP 2: add another keys
				int indexI = 0;
				int indexJ = 0;
				Set<String> refineKeysStep = new HashSet<>();
				do {
					for(; indexI < prefixesRemovedReverseSort[c].size() - 1 && refineKeysStep.size() == 0; indexI++) {
						String str1 = prefixesRemoved[c].get(indexI);
						String psStr1 = prefixes[c].get(prefixesRemovedReverseSort[c].get(indexI).getValue());
						for(indexJ = indexI + 1;
							indexJ < prefixesRemovedReverseSort[c].size() && refineKeysStep.size() == 0;
							indexJ++) {
							String str2 = prefixesRemoved[c].get(indexJ);
							String psStr2 = prefixes[c].get(prefixesRemovedReverseSort[c].get(indexJ).getValue());
							refineKeysStep = getRefineKeysStep(lcs, str1, str2, psStr1, psStr2, firstKey);
						}
					}
					if(indexI < prefixesRemovedReverse[c].size() -1 && indexJ < prefixesRemovedReverse[c].size())
						break;

					do {
						Pair<Set<String>, Set<String>> pair = getNewRefineKeys(lcs, firstKey, prefixesRemoved[c], prefixes[c], refineKeysStep);
						refineKeysStep = pair.getKey();
						if(pair.getValue().size() == 0)
							break;
						else
							refineKeysStep.addAll(pair.getValue());
					}
					while(true);

				} while(refineKeysStep.size() == 0);

				if(refineKeysStep.size() == 0) {
					// TODO: we have to apply tokenizer
				}
				else if(refineKeysStep.size() == 1) {
					String[] refinedLCSKey = (refineKeysStep.iterator().next()+Lop.OPERAND_DELIMITOR+firstKey).split(Lop.OPERAND_DELIMITOR);
					keys[c] = new ArrayList<>();
					for(String sk : refinedLCSKey)
						if(sk.length() > 0)
							keys[c].add(sk);
				}
				else{
					ArrayList<String> sortedStrings = new ArrayList<>();
					sortedStrings.addAll(refineKeysStep);
					Collections.sort(sortedStrings, AscendingStringLengthComparator);
					String[] refinedLCSKey = (sortedStrings.get(sortedStrings.size()-1)+Lop.OPERAND_DELIMITOR+firstKey).split(Lop.OPERAND_DELIMITOR);
					keys[c] = new ArrayList<>();
					for(String sk : refinedLCSKey)
						if(sk.length() > 0)
							keys[c].add(sk);
				}
			}

			// CleanUP keys: reduce key list if it possible
			for(int c :colIndexes) {
				ArrayList<String> cleanUPKeys =  cleanUPKey(keys[c], prefixes[c]);

				// set static col flag
				Boolean flagFixCol = true;
				for(int r = 0; r < nrows && flagFixCol && prefixes[c].size() !=nrows; r++){
					String rawStr =  sampleRawIndexes[r].getRaw();
					flagFixCol = getIndexOfKeyPatternOnString(rawStr, cleanUPKeys, 0) !=-1;
				}
				staticColIndexes.set(c, flagFixCol);
				if(!flagFixCol && cleanUPKeys.size() < keys[c].size()){
					String extraKey = keys[c].get(keys[c].size()-cleanUPKeys.size()-1);
					if(checkExtraKeyForCol(cleanUPKeys, extraKey,prefixes[c])){
						keys[c] = new ArrayList<>();
						keys[c].add(extraKey);
						keys[c].addAll(cleanUPKeys);
					}
					else
						keys[c] = cleanUPKeys;
				}
				else
					keys[c] = cleanUPKeys;

				// Build suffixes
				Set<String> setSuffix = new HashSet<>();
				TextTrie suffixTrie = new TextTrie();
				for(String su: suffixes[c]) {
					String[] suffixesList = su.split(Lop.OPERAND_DELIMITOR, -1);
					if(suffixesList.length > 0) {
						if(suffixesList.length == 1 && suffixesList[0].length() == 0)
							continue;
						if(suffixesList[1].length() < suffixStringLength)
							setSuffix.add(suffixesList[1]);
						else
							setSuffix.add(suffixesList[1].substring(0, suffixStringLength));
					}
				}
				if(setSuffix.size() == 0) {
					colSuffixes[c] = new HashSet<>();
					continue;
				}
				int rowIndexSuffix = 0;
				for(String ss: setSuffix){
					suffixTrie.insert(ss, rowIndexSuffix++);
				}
				HashSet<String> colSuffixe = new HashSet<>();
				ArrayList<Pair<String, Set<Integer>>> allSuffixes = suffixTrie.getAllKeys();
				if(allSuffixes.get(0).getValue().size() == setSuffix.size())
					colSuffixe.add(allSuffixes.get(0).getKey());
				else {
					Set<Integer> coveredRowIndexes = new HashSet<>();
					for(Pair<String, Set<Integer>> p: allSuffixes){
						int currentSize = coveredRowIndexes.size();
						coveredRowIndexes.addAll(p.getValue());
						if(currentSize != coveredRowIndexes.size())
							colSuffixe.add(p.getKey());
					}
				}
				colSuffixes[c] = colSuffixe;
			}
			return new Pair<>(keys, colSuffixes);
		}