in fop-core/src/main/codegen/unicode/java/org/apache/fop/hyphenation/UnicodeClasses.java [172:290]
public static void fromUCD(boolean hexcode, String unidataPath, String outfilePath)
throws IOException, URISyntaxException {
URI unidata;
if (unidataPath.endsWith("/")) {
unidata = new URI(unidataPath);
} else {
unidata = new URI(unidataPath + "/");
}
String scheme = unidata.getScheme();
if (scheme == null || !(scheme.equals("file") || scheme.equals("http"))) {
throw new FileNotFoundException(
"URI with file or http scheme required for UNIDATA input directory");
}
File f = new File(outfilePath);
if (f.exists()) {
f.delete();
}
f.createNewFile();
FileOutputStream fw = new FileOutputStream(f);
OutputStreamWriter ow = new OutputStreamWriter(fw, "utf-8");
URI inuri = unidata.resolve("Blocks.txt");
InputStream inis = null;
if (scheme.equals("file")) {
File in = new File(inuri);
inis = new FileInputStream(in);
} else if (scheme.equals("http")) {
inis = inuri.toURL().openStream();
}
InputStreamReader insr = new InputStreamReader(inis, "utf-8");
BufferedReader inbr = new BufferedReader(insr);
Map blocks = new HashMap();
for (String line = inbr.readLine(); line != null; line = inbr.readLine()) {
if (line.startsWith("#") || line.matches("^\\s*$")) {
continue;
}
String[] parts = line.split(";");
String block = parts[1].trim();
String[] indices = parts[0].split("\\.\\.");
int[] ind = {Integer.parseInt(indices[0], 16), Integer.parseInt(indices[1], 16)};
blocks.put(block, ind);
}
inbr.close();
inuri = unidata.resolve("UnicodeData.txt");
if (scheme.equals("file")) {
File in = new File(inuri);
inis = new FileInputStream(in);
} else if (scheme.equals("http")) {
inis = inuri.toURL().openStream();
}
insr = new InputStreamReader(inis, "utf-8");
inbr = new BufferedReader(insr);
int maxChar;
maxChar = Character.MAX_VALUE;
ow.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
License.writeXMLLicenseId(ow);
ow.write("\n");
writeGenerated(ow);
ow.write("\n");
ow.write("<classes>\n");
for (String line = inbr.readLine(); line != null; line = inbr.readLine()) {
String[] fields = line.split(";", NUM_FIELDS);
int code = Integer.parseInt(fields[UNICODE], 16);
if (code > maxChar) {
break;
}
if (((fields[GENERAL_CATEGORY].equals("Ll") || fields[GENERAL_CATEGORY].equals("Lu")
|| fields[GENERAL_CATEGORY].equals("Lt"))
&& ("".equals(fields[SIMPLE_LOWERCASE_MAPPING])
|| fields[UNICODE].equals(fields[SIMPLE_LOWERCASE_MAPPING])))
|| fields[GENERAL_CATEGORY].equals("Lo")) {
String[] blockNames = {"Superscripts and Subscripts",
"Letterlike Symbols",
"Alphabetic Presentation Forms",
"Halfwidth and Fullwidth Forms",
"CJK Unified Ideographs",
"CJK Unified Ideographs Extension A",
"Hangul Syllables"};
int j;
for (j = 0; j < blockNames.length; ++j) {
int[] ind = (int[]) blocks.get(blockNames[j]);
if (code >= ind[0] && code <= ind[1]) {
break;
}
}
if (j < blockNames.length) {
continue;
}
int uppercode = -1;
int titlecode = -1;
if (!"".equals(fields[SIMPLE_UPPERCASE_MAPPING])) {
uppercode = Integer.parseInt(fields[SIMPLE_UPPERCASE_MAPPING], 16);
}
if (!"".equals(fields[SIMPLE_TITLECASE_MAPPING])) {
titlecode = Integer.parseInt(fields[SIMPLE_TITLECASE_MAPPING], 16);
}
StringBuilder s = new StringBuilder();
if (hexcode) {
s.append("0x" + fields[UNICODE].replaceFirst("^0+", "").toLowerCase() + " ");
}
s.append(Character.toChars(code));
if (uppercode != -1 && uppercode != code) {
s.append(Character.toChars(uppercode));
}
if (titlecode != -1 && titlecode != code && titlecode != uppercode) {
s.append(Character.toChars(titlecode));
}
ow.write(s.toString() + "\n");
}
}
ow.write("</classes>\n");
ow.flush();
ow.close();
inbr.close();
}