java/de/jflex/migration/unicodedatatest/testcaseless/CaselessTestGenerator.java (107 lines of code) (raw):
/*
* Copyright (C) 2021 Google, LLC.
* SPDX-License-Identifier: BSD-3-Clause
*/
package de.jflex.migration.unicodedatatest.testcaseless;
import com.google.common.collect.ImmutableList;
import de.jflex.migration.unicodedatatest.base.UnicodeVersion;
import de.jflex.testing.unicodedata.AbstractSimpleParser.PatternHandler;
import de.jflex.testing.unicodedata.SimpleCaselessParser;
import de.jflex.ucd.UcdFileType;
import de.jflex.ucd.UcdVersion;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.velocity.runtime.parser.ParseException;
public class CaselessTestGenerator {
private CaselessTestGenerator() {}
public static void main(String[] args) throws IOException, ParseException {
UnicodeVersion version = UnicodeVersion.create(args[0]);
Path outDir = Paths.get(args[1]);
List<String> files = Arrays.asList(Arrays.copyOfRange(args, 2, args.length));
UcdVersion ucd = UcdVersion.findUcdFiles(version.version(), files);
Path ucdUnicodeData = ucd.getFile(UcdFileType.UnicodeData).toPath();
Equivalences<Integer> equivalences = parseUnicodeData(ucdUnicodeData);
if (equivalences.getKeys().isEmpty()) {
throw new IllegalStateException("No equivalence found in " + ucdUnicodeData);
}
generate(version, outDir, equivalences);
}
static Equivalences<Integer> parseUnicodeData(Path ucdUnicodeData) throws IOException {
CaselessHandler handler = new CaselessHandler();
SimpleCaselessParser parser =
new SimpleCaselessParser(
Files.newBufferedReader(ucdUnicodeData, StandardCharsets.UTF_8), handler);
parser.parse();
return handler.equivalences;
}
private static void generate(
UnicodeVersion version, Path outDir, Equivalences<Integer> equivalences)
throws IOException, ParseException {
new UnicodeCaselessFlexGenerator(version, equivalences).generate(outDir);
new UnicodeCaselessTestGenerator(version).generate(outDir);
new UnicodeCaselessGoldenGenerator(version, equivalences).generate(outDir);
}
private static class CaselessHandler implements PatternHandler {
Equivalences<Integer> equivalences = new Equivalences<>();
@Override
public void onRegexMatch(List<String> regexpGroups) {
String strUpperCaseMapping = regexpGroups.get(1);
String strLowerCaseMapping = regexpGroups.get(2);
String strTitleCaseMapping = regexpGroups.get(3);
if (strUpperCaseMapping.isEmpty()
&& strLowerCaseMapping.isEmpty()
&& strTitleCaseMapping.isEmpty()) {
return;
}
int codePoint = Integer.parseInt(regexpGroups.get(0), 16);
maybeAddMapping(codePoint, strLowerCaseMapping);
maybeAddMapping(codePoint, strUpperCaseMapping);
maybeAddMapping(codePoint, strTitleCaseMapping);
// TODO(regisd)
}
private void maybeAddMapping(int codepoint, String strMapping) {
if (!strMapping.isEmpty()) {
int mapping = Integer.parseInt(strMapping, 16);
equivalences.add(codepoint, mapping);
}
}
}
static class Equivalences<T extends Comparable<T>> {
/** Mapping from value → equivalent values. */
protected Map<T, Set<T>> equivalences = new HashMap<>();
Set<T> get(T value) {
return equivalences.computeIfAbsent(
value,
v -> {
Set<T> set = new HashSet<>();
set.add(value);
return set;
});
}
void add(T codepoint, T mapping) {
if (equivalences.containsKey(mapping)) {
Set<T> equiv = equivalences.get(mapping);
equiv.add(codepoint);
equivalences.put(codepoint, equiv);
} else {
Set<T> equiv = get(codepoint);
equiv.add(mapping);
equivalences.put(mapping, equiv);
}
}
Set<T> getKeys() {
return equivalences.keySet();
}
ImmutableList<T> getSortedKeys(Comparator<T> comparator) {
return ImmutableList.sortedCopyOf(comparator, getKeys());
}
/**
* Returns the equivalent value of the given value, i.e. the minimum value in the equivalence
* set.
*/
public T getEquivalentValue(T value) {
return Collections.min(equivalences.get(value));
}
}
}