javatests/de/jflex/ucd_generator/scanner/UcdScannerIntegrationTest.java (239 lines of code) (raw):
/*
* Copyright (C) 2020 Google, LLC.
* SPDX-License-Identifier: BSD-3-Clause
*/
package de.jflex.ucd_generator.scanner;
import static com.google.common.truth.Truth.assertThat;
import com.google.common.collect.ImmutableList;
import de.jflex.ucd.CodepointRange;
import de.jflex.ucd_generator.TestedVersions;
import de.jflex.version.Version;
import java.io.IOException;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
/** Integration test for the {@link UcdScanner} on Unicode 6. */
public class UcdScannerIntegrationTest {
/**
* Unicode 6.0 property value: {me}
*
* <pre>{@code
* "\u0488\u0489" + "\u20dd\u20e0" + "\u20e2\u20e4" + "\ua670\ua672"
* }</pre>
*/
private static final ImmutableList<CodepointRange> INTERVALS_FOR_GENERALCATEGORY_ME =
ImmutableList.of(
CodepointRange.create('\u0488', '\u0489'),
CodepointRange.create('\u20dd', '\u20e0'),
CodepointRange.create('\u20e2', '\u20e4'),
CodepointRange.create('\ua670', '\ua672'));
private UcdScanner ucdScanner;
@Before
public void ucdScanner() {
ucdScanner = new UcdScanner(TestedVersions.UCD_VERSION_6_3);
assertThat(ucdScanner.ucdVersion().version()).isEqualTo(new Version(6, 3, 0));
}
@Test
public void scanPropertyAliases() throws Exception {
ucdScanner.scanPropertyAliases();
assertThat(ucdScanner.unicodeData.getCanonicalPropertyName("ccc"))
.isEqualTo("canonicalcombiningclass");
assertThat(ucdScanner.unicodeData.getPropertyAliases("script")).containsExactly("sc", "script");
assertThat(ucdScanner.unicodeData.getPropertyAliases("Bidi_Class"))
.containsExactly("bc", "bidiclass");
}
@Test
public void scanPropertyValueAliases() throws Exception {
ucdScanner.scanPropertyAliases();
assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "at"))
.containsExactly("at");
assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "aterm"))
.containsExactly("aterm");
ucdScanner.scanPropertyValueAliases();
assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "at"))
.containsExactly("at", "aterm");
assertThat(ucdScanner.unicodeData.getPropertyValueAliases("sentencebreak", "aterm"))
.containsExactly("at", "aterm");
}
@Test
public void scanUnicodeData() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
// generalcategory=me
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me")).isEmpty();
ucdScanner.scanUnicodeData();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME)
.inOrder();
assertThat(ucdScanner.unicodeData.maximumCodePoint()).isEqualTo(1114111);
assertThat(ucdScanner.unicodeData.maxCaselessMatchPartitionSize()).isEqualTo(4);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("generalcategory"))
.isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("gc"));
}
@Test
public void scanPropList() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("otheruppercase")).isEmpty();
ucdScanner.scanPropList();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("otheruppercase"))
.contains(CodepointRange.create(8544, 8559));
}
@Test
public void scanDerivedCoreProperties() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("idcontinue")).isEmpty();
ucdScanner.scanDerivedCoreProperties();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("idcontinue"))
.contains(CodepointRange.create(48, 57));
}
@Test
public void scanScripts() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("arabic")).isEmpty();
ucdScanner.scanScripts();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("arabic")).isNotEmpty();
}
@Ignore // TODO
@Test
public void scanScripExtensions() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
ucdScanner.scanScripts();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("hiragana"))
.isNotEmpty(); // from scanScripts
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("katakana")).isNotEmpty();
assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "hiragana")).isFalse();
assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "katakana")).isFalse();
ucdScanner.scanScriptExtensions();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
// 3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..
assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "hiragana")).isTrue();
assertThat(ucdScanner.unicodeData.codePointInProperty(0x3034, "katakana")).isTrue();
}
@Ignore // TODO
@Test
public void scanBlocks() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
ucdScanner.scanScripts();
ucdScanner.scanScriptExtensions();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("block=supplementalpunctuation"))
.isEmpty();
ucdScanner.scanBlocks();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("scriptextensions=hira"))
.isNotEmpty();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("block=supplementalpunctuation"))
.isNotEmpty();
}
@Ignore // TODO
@Test
public void scanLineBreak() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
ucdScanner.scanScripts();
ucdScanner.scanScriptExtensions();
ucdScanner.scanBlocks();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=bb")).isEmpty();
ucdScanner.scanLineBreak();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=bb")).isNotEmpty();
// TODO(regisd) It seems I'm missing some property value aliases
// assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=bb"))
// .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("linebreak=breakbefore"));
}
@Ignore // TODO
@Test
public void scanGraphemeBreakProperty() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
ucdScanner.scanScripts();
ucdScanner.scanScriptExtensions();
ucdScanner.scanBlocks();
ucdScanner.scanLineBreak();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("graphemeclusterbreak=ebasegaz"))
.isEmpty();
ucdScanner.scanGraphemeBreakProperty();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("graphemeclusterbreak=ebasegaz"))
.isNotEmpty();
// TODO(regisd) It seems I'm missing some property value aliases
// assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("graphemeclusterbreak=ebasegaz"))
// .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("gcb=ebg"));
}
@Ignore // TODO
@Test
public void scanSentenceBreakProperty() throws Exception {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
ucdScanner.scanScripts();
ucdScanner.scanScriptExtensions();
ucdScanner.scanBlocks();
ucdScanner.scanLineBreak();
ucdScanner.scanGraphemeBreakProperty();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=close")).isEmpty();
ucdScanner.scanSentenceBreakProperty();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=close"))
.isNotEmpty();
// TODO(regisd) It seems I'm missing some property value aliases
// assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=close"))
// .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("sentencebreak=cl"));
}
@Ignore // TODO
@Test
public void scanWordBreakProperty() throws IOException {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
ucdScanner.scanScripts();
ucdScanner.scanScriptExtensions();
ucdScanner.scanBlocks();
ucdScanner.scanLineBreak();
ucdScanner.scanGraphemeBreakProperty();
ucdScanner.scanSentenceBreakProperty();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("wordbreak=doublequote")).isEmpty();
ucdScanner.scanWordBreakProperty();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("wordbreak=doublequote"))
.isNotEmpty();
// TODO(regisd) It seems I'm missing some property value aliases
// assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("wordbreak=doublequote"))
// .isEqualTo(ucdScanner.unicodeData.getPropertyValueIntervals("wb=dq"));
}
@Ignore // TODO
@Test
public void scanDerivedAge() throws IOException {
ucdScanner.scanPropertyAliases();
ucdScanner.scanPropertyValueAliases();
ucdScanner.scanUnicodeData();
ucdScanner.scanPropList();
ucdScanner.scanDerivedCoreProperties();
ucdScanner.scanScripts();
ucdScanner.scanScriptExtensions();
ucdScanner.scanBlocks();
ucdScanner.scanLineBreak();
ucdScanner.scanGraphemeBreakProperty();
ucdScanner.scanSentenceBreakProperty();
ucdScanner.scanWordBreakProperty();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("age=9.0")).isEmpty();
ucdScanner.scanDerivedAge();
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("me"))
.containsExactlyElementsIn(INTERVALS_FOR_GENERALCATEGORY_ME);
assertThat(ucdScanner.unicodeData.getPropertyValueAliases("age", "4.0"))
.containsExactly("4.0", "v40");
assertThat(ucdScanner.unicodeData.getPropertyValueIntervals("age=9.0")).isNotEmpty();
}
}