in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java [332:510]
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, FlagEnumerator flags)
throws IOException, ParseException {
TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
CharHashSet prefixContFlags = new CharHashSet();
CharHashSet suffixContFlags = new CharHashSet();
Map<String, Integer> seenPatterns = new HashMap<>();
// zero condition -> 0 ord
seenPatterns.put(AffixCondition.ALWAYS_TRUE_KEY, 0);
patterns.add(null);
// zero strip -> 0 ord
Map<String, Integer> seenStrips = new LinkedHashMap<>();
seenStrips.put("", 0);
LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
String line;
while ((line = reader.readLine()) != null) {
// ignore any BOM marker on first line
if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) {
line = line.substring(1);
}
line = line.trim();
if (line.isEmpty()) continue;
String firstWord = line.split("\\s")[0];
// TODO: convert to a switch?
if ("AF".equals(firstWord)) {
parseAlias(line);
} else if ("AM".equals(firstWord)) {
parseMorphAlias(line);
} else if ("PFX".equals(firstWord)) {
parseAffix(
prefixes, prefixContFlags, line, reader, PREFIX, seenPatterns, seenStrips, flags);
} else if ("SFX".equals(firstWord)) {
parseAffix(
suffixes, suffixContFlags, line, reader, SUFFIX, seenPatterns, seenStrips, flags);
} else if (line.equals("COMPLEXPREFIXES")) {
complexPrefixes =
true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
} else if ("CIRCUMFIX".equals(firstWord)) {
circumfix = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("KEEPCASE".equals(firstWord)) {
keepcase = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("FORCEUCASE".equals(firstWord)) {
forceUCase = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("NEEDAFFIX".equals(firstWord) || "PSEUDOROOT".equals(firstWord)) {
needaffix = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("ONLYINCOMPOUND".equals(firstWord)) {
onlyincompound = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("CHECKSHARPS".equals(firstWord)) {
checkSharpS = true;
} else if ("IGNORE".equals(firstWord)) {
ignore = singleArgument(reader, line).toCharArray();
Arrays.sort(ignore);
} else if ("ICONV".equals(firstWord) || "OCONV".equals(firstWord)) {
int num = parseNum(reader, line);
ConvTable res = parseConversions(reader, num);
if (line.startsWith("I")) {
iconv = res;
} else {
oconv = res;
}
} else if ("FULLSTRIP".equals(firstWord)) {
fullStrip = true;
} else if ("LANG".equals(firstWord)) {
language = singleArgument(reader, line);
this.alternateCasing = hasLanguage("tr", "az");
} else if ("BREAK".equals(firstWord)) {
breaks = parseBreaks(reader, line);
} else if ("WORDCHARS".equals(firstWord)) {
wordChars = firstArgument(reader, line);
} else if ("TRY".equals(firstWord)) {
tryChars = firstArgument(reader, line);
} else if ("REP".equals(firstWord)) {
if (tolerateRepRuleCountMismatches()) {
String[] parts = splitBySpace(reader, line, 2, Integer.MAX_VALUE);
// ignore REP N, as actual N may be incorrect
if (parts.length >= 3) {
repTable.add(new RepEntry(parts[1], parts[2]));
}
} else {
int count = parseNum(reader, line);
for (int i = 0; i < count; i++) {
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
repTable.add(new RepEntry(parts[1], parts[2]));
}
}
} else if ("MAP".equals(firstWord)) {
int count = parseNum(reader, line);
for (int i = 0; i < count; i++) {
mapTable.add(parseMapEntry(reader, reader.readLine()));
}
} else if ("KEY".equals(firstWord)) {
neighborKeyGroups = singleArgument(reader, line).split("\\|");
} else if ("NOSPLITSUGS".equals(firstWord)) {
enableSplitSuggestions = false;
} else if ("MAXNGRAMSUGS".equals(firstWord)) {
maxNGramSuggestions = Integer.parseInt(singleArgument(reader, line));
} else if ("MAXDIFF".equals(firstWord)) {
int i = Integer.parseInt(singleArgument(reader, line));
if (i < 0 || i > 10) {
throw new ParseException("MAXDIFF should be between 0 and 10", reader.getLineNumber());
}
maxDiff = i;
} else if ("ONLYMAXDIFF".equals(firstWord)) {
onlyMaxDiff = true;
} else if ("FORBIDDENWORD".equals(firstWord)) {
forbiddenword = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("NOSUGGEST".equals(firstWord)) {
noSuggest = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("SUBSTANDARD".equals(firstWord)) {
subStandard = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("COMPOUNDMIN".equals(firstWord)) {
compoundMin = Math.max(1, parseNum(reader, line));
} else if ("COMPOUNDWORDMAX".equals(firstWord)) {
compoundMax = Math.max(1, parseNum(reader, line));
} else if ("COMPOUNDRULE".equals(firstWord)) {
compoundRules = parseCompoundRules(reader, parseNum(reader, line));
} else if ("COMPOUNDFLAG".equals(firstWord)) {
compoundFlag = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("COMPOUNDBEGIN".equals(firstWord)) {
compoundBegin = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("COMPOUNDMIDDLE".equals(firstWord)) {
compoundMiddle = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("COMPOUNDEND".equals(firstWord)) {
compoundEnd = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("COMPOUNDPERMITFLAG".equals(firstWord)) {
compoundPermit = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("COMPOUNDFORBIDFLAG".equals(firstWord)) {
compoundForbid = flagParsingStrategy.parseFlag(singleArgument(reader, line));
} else if ("CHECKCOMPOUNDCASE".equals(firstWord)) {
checkCompoundCase = true;
} else if ("CHECKCOMPOUNDDUP".equals(firstWord)) {
checkCompoundDup = true;
} else if ("CHECKCOMPOUNDREP".equals(firstWord)) {
checkCompoundRep = true;
} else if ("CHECKCOMPOUNDTRIPLE".equals(firstWord)) {
checkCompoundTriple = true;
} else if ("SIMPLIFIEDTRIPLE".equals(firstWord)) {
simplifiedTriple = true;
} else if ("CHECKCOMPOUNDPATTERN".equals(firstWord)) {
int count = parseNum(reader, line);
for (int i = 0; i < count; i++) {
checkCompoundPatterns.add(
new CheckCompoundPattern(reader.readLine(), flagParsingStrategy, this));
}
} else if ("SET".equals(firstWord)) {
checkCriticalDirectiveSame(
"SET", reader, decoder.charset(), getDecoder(singleArgument(reader, line)).charset());
} else if ("FLAG".equals(firstWord)) {
FlagParsingStrategy strategy = getFlagParsingStrategy(line, decoder.charset());
checkCriticalDirectiveSame(
"FLAG", reader, flagParsingStrategy.getClass(), strategy.getClass());
}
}
this.prefixes = affixFST(prefixes);
this.suffixes = affixFST(suffixes);
secondStagePrefixFlags = toSortedCharArray(prefixContFlags);
secondStageSuffixFlags = toSortedCharArray(suffixContFlags);
int totalChars = 0;
for (String strip : seenStrips.keySet()) {
totalChars += strip.length();
}
stripData = new char[totalChars];
stripOffsets = new int[seenStrips.size() + 1];
int currentOffset = 0;
int currentIndex = 0;
for (String strip : seenStrips.keySet()) {
stripOffsets[currentIndex++] = currentOffset;
strip.getChars(0, strip.length(), stripData, currentOffset);
currentOffset += strip.length();
}
assert currentIndex == seenStrips.size();
stripOffsets[currentIndex] = currentOffset;
}