javatests/de/jflex/ucd_generator/scanner/UcdScannerIntegrationTest.java (239 lines of code) (raw):

/* * Copyright (C) 2020 Google, LLC. * SPDX-License-Identifier: BSD-3-Clause */ package de.jflex.ucd_generator.scanner; import static com.google.common.truth.Truth.assertThat; import com.google.common.collect.ImmutableList; import de.jflex.ucd.CodepointRange; import de.jflex.ucd_generator.TestedVersions; import de.jflex.version.Version; import java.io.IOException; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; /** Integration test for the {@link UcdScanner} on Unicode 6. */ public class UcdScannerIntegrationTest { /** * Unicode 6.0 property value: {me} * * <pre>{@code * "\u0488\u0489" + "\u20dd\u20e0" + "\u20e2\u20e4" + "\ua670\ua672" * }</pre> */ private static final ImmutableList<CodepointRange> INTERVALS_FOR_GENERALCATEGORY_ME = ImmutableList.of( CodepointRange.create('\u0488', '\u0489'), CodepointRange.create('\u20dd', '\u20e0'), CodepointRange.create('\u20e2', '\u20e4'), CodepointRange.create('\ua670', '\ua672')); private UcdScanner ucdScanner; @Before public void ucdScanner() { ucdScanner = new UcdScanner(TestedVersions.UCD_VERSION_6_3); assertThat(ucdScanner.ucdVersion().version()).isEqualTo(new Version(6, 3, 0)); } @Test public void scanPropertyAliases() throws Exception { ucdScanner.scanPropertyAliases(); assertThat(ucdScanner.unicodeData.getCanonicalPropertyName("ccc")) .isEqualTo("canonicalcombiningclass"); assertThat(ucdScanner.unicodeData.getPropertyAliases("script")).containsExactly("sc", "script"); assertThat(ucdScanner.unicodeData.getPropertyAliases("Bidi_Class")) .containsExactly("bc", "bidiclass"); } @Test public void scanPropertyValueAliases() throws Exception { ucdScanner.scanPropertyAliases(); assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "at")) .containsExactly("at"); assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "aterm")) .containsExactly("aterm"); ucdScanner.scanPropertyValueAliases(); assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "at")) .containsExactly("at", "aterm"); assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "aterm")) .containsExactly("at", "aterm"); } @Test public void scanUnicodeData() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); // generalcategory=me assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")).isEmpty(); ucdScanner.scanUnicodeData(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME) .inOrder(); assertThat(ucdScanner.unicodeData.maximumCodePoint()).isEqualTo(1114111); assertThat(ucdScanner.unicodeData.maxCaselessMatchPartitionSize()).isEqualTo(4); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("generalcategory")) .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("gc")); } @Test public void scanPropList() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("otheruppercase")).isEmpty(); ucdScanner.scanPropList(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("otheruppercase")) .contains(CodepointRange.create(8544, 8559)); } @Test public void scanDerivedCoreProperties() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("idcontinue")).isEmpty(); ucdScanner.scanDerivedCoreProperties(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("idcontinue")) .contains(CodepointRange.create(48, 57)); } @Test public void scanScripts() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("arabic")).isEmpty(); ucdScanner.scanScripts(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("arabic")).isNotEmpty(); } @Ignore // TODO @Test public void scanScripExtensions() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); ucdScanner.scanScripts(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("hiragana")) .isNotEmpty(); // from scanScripts assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("katakana")).isNotEmpty(); assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "hiragana")).isFalse(); assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "katakana")).isFalse(); ucdScanner.scanScriptExtensions(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); // 3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK.. assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "hiragana")).isTrue(); assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "katakana")).isTrue(); } @Ignore // TODO @Test public void scanBlocks() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); ucdScanner.scanScripts(); ucdScanner.scanScriptExtensions(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("block=supplementalpunctuation")) .isEmpty(); ucdScanner.scanBlocks(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("scriptextensions=hira")) .isNotEmpty(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("block=supplementalpunctuation")) .isNotEmpty(); } @Ignore // TODO @Test public void scanLineBreak() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); ucdScanner.scanScripts(); ucdScanner.scanScriptExtensions(); ucdScanner.scanBlocks(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=bb")).isEmpty(); ucdScanner.scanLineBreak(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=bb")).isNotEmpty(); // TODO(regisd) It seems I'm missing some property value aliases // assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=bb")) // .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=breakbefore")); } @Ignore // TODO @Test public void scanGraphemeBreakProperty() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); ucdScanner.scanScripts(); ucdScanner.scanScriptExtensions(); ucdScanner.scanBlocks(); ucdScanner.scanLineBreak(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("graphemeclusterbreak=ebasegaz")) .isEmpty(); ucdScanner.scanGraphemeBreakProperty(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("graphemeclusterbreak=ebasegaz")) .isNotEmpty(); // TODO(regisd) It seems I'm missing some property value aliases // assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("graphemeclusterbreak=ebasegaz")) // .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("gcb=ebg")); } @Ignore // TODO @Test public void scanSentenceBreakProperty() throws Exception { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); ucdScanner.scanScripts(); ucdScanner.scanScriptExtensions(); ucdScanner.scanBlocks(); ucdScanner.scanLineBreak(); ucdScanner.scanGraphemeBreakProperty(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=close")).isEmpty(); ucdScanner.scanSentenceBreakProperty(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=close")) .isNotEmpty(); // TODO(regisd) It seems I'm missing some property value aliases // assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=close")) // .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=cl")); } @Ignore // TODO @Test public void scanWordBreakProperty() throws IOException { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); ucdScanner.scanScripts(); ucdScanner.scanScriptExtensions(); ucdScanner.scanBlocks(); ucdScanner.scanLineBreak(); ucdScanner.scanGraphemeBreakProperty(); ucdScanner.scanSentenceBreakProperty(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("wordbreak=doublequote")).isEmpty(); ucdScanner.scanWordBreakProperty(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("wordbreak=doublequote")) .isNotEmpty(); // TODO(regisd) It seems I'm missing some property value aliases // assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("wordbreak=doublequote")) // .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("wb=dq")); } @Ignore // TODO @Test public void scanDerivedAge() throws IOException { ucdScanner.scanPropertyAliases(); ucdScanner.scanPropertyValueAliases(); ucdScanner.scanUnicodeData(); ucdScanner.scanPropList(); ucdScanner.scanDerivedCoreProperties(); ucdScanner.scanScripts(); ucdScanner.scanScriptExtensions(); ucdScanner.scanBlocks(); ucdScanner.scanLineBreak(); ucdScanner.scanGraphemeBreakProperty(); ucdScanner.scanSentenceBreakProperty(); ucdScanner.scanWordBreakProperty(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("age=9.0")).isEmpty(); ucdScanner.scanDerivedAge(); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")) .containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME); assertThat(ucdScanner.unicodeData.getPropertyValueAliases("age", "4.0")) .containsExactly("4.0", "v40"); assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("age=9.0")).isNotEmpty(); } }