java/de/jflex/ucd_generator/ucd/PropertyValueIntervals.java (141 lines of code) (raw):

/* * Copyright (C) 2009-2013 Steve Rowe <sarowe@gmail.com> * Copyright (C) 2019-2020 Google, LLC. * * SPDX-License-Identifier: BSD-3-Clause */ package de.jflex.ucd_generator.ucd; import static de.jflex.ucd.SurrogateUtils.isSurrogateProperty; import static de.jflex.ucd.SurrogateUtils.removeSurrogates; import static de.jflex.ucd_generator.ucd.PropertyNames.NORMALIZED_GENERAL_CATEGORY; import com.google.common.base.Preconditions; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.ImmutableSetMultimap; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.Multimap; import com.google.common.collect.Ordering; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; import de.jflex.ucd.CodepointRange; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.stream.Collectors; public class PropertyValueIntervals { private static final boolean DEBUG = false; private final PropertyValues propertyValues; Set<String> usedBinaryProperties = new HashSet<>(); private final Multimap<String, String> usedEnumProperties = HashMultimap.create(); // We need to keep the order of the added CodepointRanges private final SortedSetMultimap<String, CodepointRange> propertyValueIntervals = TreeMultimap.create(Ordering.natural(), CodepointRange.COMPARATOR); public PropertyValueIntervals(PropertyValues propertyValues) { this.propertyValues = propertyValues; } /** * Given a binary property name, and starting and ending code points, adds the interval to the * {@link #propertyValueIntervals} map. * * @param propName The property name, e.g. "Assigned". * @param startCodePoint The first code point in the interval. * @param endCodePoint The last code point in the interval. */ boolean addBinaryPropertyInterval(String propName, int startCodePoint, int endCodePoint) { boolean added = addPropertyInterval(propName, startCodePoint, endCodePoint); if (added) { usedBinaryProperties.add(propName); } return added; } boolean addEnumPropertyInterval( String propName, String propValue, int startCodePoint, int endCodePoint) { propValue = propertyValues.getCanonicalValueName(propName, propValue); String canonicalValue = PropertyNames.canonicalValue(propName, propValue); boolean added = addPropertyInterval(canonicalValue, startCodePoint, endCodePoint); if (added) { usedEnumProperties.put(propName, propValue); } return added; } private boolean addPropertyInterval(String propName, int startCodePoint, int endCodePoint) { if (isSurrogateProperty(propName)) { // Skip surrogate properties [U+D800-U+DFFF]. // e.g. \p{Cs} - can't be represented in valid UTF-16 encoded strings. return false; } List<CodepointRange> ranges = removeSurrogates(startCodePoint, endCodePoint); if (ranges.isEmpty()) { return false; } boolean added = propertyValueIntervals.putAll(propName, ranges); checkPropertyIntervalsState(propName, ranges); return added; } /** Assert property intervals are sorted. */ private void checkPropertyIntervalsState( String propName, Collection<CodepointRange> addedRanges) { if (DEBUG) { try { Preconditions.checkState( Ordering.from(CodepointRange.COMPARATOR) .isOrdered(propertyValueIntervals.get(propName))); } catch (IllegalStateException e) { throw new IllegalStateException( String.format( "Property value intervals not order for %s after adding %s", propName, addedRanges.stream() .map(CodepointRange::toString) .collect(Collectors.joining(","))), e); } } } public void addAllRanges(String propertyName, Collection<CodepointRange> ranges) { propertyValueIntervals.putAll(propertyName, ranges); usedBinaryProperties.add(propertyName); } /** * @deprecated Hack for Unicode 2.0 */ @Deprecated public void removeEnumPropertyPoint(String propertyName, String propertyValue, int codepoint) { CodepointRange point = CodepointRange.createPoint(codepoint); String canonicalName = PropertyNames.canonicalValue(PropertyNames.normalize(propertyName), propertyValue); SortedSet<CodepointRange> ranges = propertyValueIntervals.get(canonicalName); CodepointRange range = ranges.headSet(point).last(); ranges.remove(range); // In practice there is only one \ufe70\ufefe ; but in theory the removed point could have been // in the middle. ImmutableList<CodepointRange> hackedRanges = CodepointRangeSet.builder().add(range).substract(point).build().ranges(); ranges.addAll(hackedRanges); } ImmutableList<CodepointRange> getRanges(String propName) { Collection<CodepointRange> ranges = propertyValueIntervals.get(propName); if (ranges.isEmpty()) { return ImmutableList.of(); } return ImmutableList.copyOf(ranges); } public ImmutableMultimap<String, String> usedEnumeratedProperties() { ImmutableSetMultimap.Builder<String, String> multimap = ImmutableSetMultimap.builder(); multimap.putAll(usedEnumProperties); // First letter is added for General_category such as // gc ; C ; Other # Cc | Cf | Cn | Co | Cs // gc ; Cc ; Control ; cntrl // gc ; Cf ; Format // etc. if (usedEnumProperties.containsKey(NORMALIZED_GENERAL_CATEGORY)) { for (String value : usedEnumProperties.get(NORMALIZED_GENERAL_CATEGORY)) { if (value.length() == 2) { multimap.put(NORMALIZED_GENERAL_CATEGORY, value.substring(0, 1)); } } } return multimap.build(); } public boolean hasUsedEnumeratedProperty(String category) { return usedEnumProperties.containsKey(category); } public Set<String> keySet() { return propertyValueIntervals.keySet(); } public ImmutableSortedMap<String, CodepointRangeSet> asSortedMap() { ImmutableSortedMap.Builder<String, CodepointRangeSet> map = ImmutableSortedMap.naturalOrder(); for (String property : propertyValueIntervals.keySet()) { map.put( property, CodepointRangeSet.builder() .addAllImmutable(propertyValueIntervals.get(property)) .build()); } return map.build(); } public boolean codePointInProperty(int codepoint, String propName) { // The codepoint could be in the last range stating before CodepointRange point = CodepointRange.createPoint(codepoint); SortedSet<CodepointRange> ranges = propertyValueIntervals.get(propName); SortedSet<CodepointRange> head = ranges.headSet(point); if (head.isEmpty()) { return false; } return head.last().contains(point); } }