java/de/jflex/testing/unicodedata/SimpleDerivedCorePropertiesParser.java (47 lines of code) (raw):

/* * Copyright (C) 2021 Google, LLC. * SPDX-License-Identifier: BSD-3-Clause */ package de.jflex.testing.unicodedata; import com.google.common.collect.ImmutableList; import de.jflex.ucd.CodepointRange; import de.jflex.ucd.NamedCodepointRange; import java.io.IOException; import java.io.Reader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; import java.util.regex.Pattern; /** * Parser of unicode {@code DerivedCoreProperties.txt}. * * <pre>{@code * # For documentation, see DerivedProperties.html * FFE2 ; Math # Sm FULLWIDTH NOT SIGN * FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW * }</pre> */ public class SimpleDerivedCorePropertiesParser extends AbstractSimpleParser { private static final Pattern PATTERN = Pattern.compile("^([0-9A-F]{4,6})(\\.\\.[0-9A-F]{4,6})?\\s*;\\s([^#]*).*$"); public SimpleDerivedCorePropertiesParser(Reader reader, PatternHandler handler) { super(PATTERN, reader, handler); } /** Parses the unicode {@code Blocks.txt} and returns the defined blocks. */ public static ImmutableList<NamedCodepointRange<String>> parseProperties(Path blocksTxt) throws IOException { return parseProperties(Files.newBufferedReader(blocksTxt, StandardCharsets.UTF_8)); } static ImmutableList<NamedCodepointRange<String>> parseProperties(Reader reader) throws IOException { ImmutableList.Builder<NamedCodepointRange<String>> list = ImmutableList.builder(); SimpleDerivedCorePropertiesParser parser = new SimpleDerivedCorePropertiesParser( reader, regexpGroups -> list.add(createBlock(regexpGroups))); parser.parse(); return list.build(); } private static NamedCodepointRange<String> createBlock(List<String> regexpGroups) { if (regexpGroups.size() == 3) { return NamedCodepointRange.create( regexpGroups.get(2).trim(), createRange(regexpGroups.get(0), regexpGroups.get(1))); } else { return NamedCodepointRange.create( regexpGroups.get(1).trim(), createRange(regexpGroups.get(0))); } } private static CodepointRange createRange(String start, String end) { return CodepointRange.create( Integer.parseInt(start, 16), Integer.parseInt(end.substring("..".length()), 16)); } private static CodepointRange createRange(String point) { return CodepointRange.createPoint(Integer.parseInt(point, 16)); } }