public static void fromUCD()

in fop-core/src/main/codegen/unicode/java/org/apache/fop/hyphenation/UnicodeClasses.java [172:290]


    public static void fromUCD(boolean hexcode, String unidataPath, String outfilePath)
    throws IOException, URISyntaxException {
        URI unidata;
        if (unidataPath.endsWith("/")) {
            unidata = new URI(unidataPath);
        } else {
            unidata = new URI(unidataPath + "/");
        }
        String scheme = unidata.getScheme();
        if (scheme == null || !(scheme.equals("file") || scheme.equals("http"))) {
            throw new FileNotFoundException(
            "URI with file or http scheme required for UNIDATA input directory");
        }

        File f = new File(outfilePath);
        if (f.exists()) {
            f.delete();
        }
        f.createNewFile();
        FileOutputStream fw = new FileOutputStream(f);
        OutputStreamWriter ow = new OutputStreamWriter(fw, "utf-8");

        URI inuri = unidata.resolve("Blocks.txt");
        InputStream inis = null;
        if (scheme.equals("file")) {
            File in = new File(inuri);
            inis = new FileInputStream(in);
        } else if (scheme.equals("http")) {
            inis = inuri.toURL().openStream();
        }
        InputStreamReader insr = new InputStreamReader(inis, "utf-8");
        BufferedReader inbr = new BufferedReader(insr);
        Map blocks = new HashMap();
        for (String line = inbr.readLine(); line != null; line = inbr.readLine()) {
            if (line.startsWith("#") || line.matches("^\\s*$")) {
                continue;
            }
            String[] parts = line.split(";");
            String block = parts[1].trim();
            String[] indices = parts[0].split("\\.\\.");
            int[] ind = {Integer.parseInt(indices[0], 16), Integer.parseInt(indices[1], 16)};
            blocks.put(block, ind);
        }
        inbr.close();

        inuri = unidata.resolve("UnicodeData.txt");
        if (scheme.equals("file")) {
            File in = new File(inuri);
            inis = new FileInputStream(in);
        } else if (scheme.equals("http")) {
            inis = inuri.toURL().openStream();
        }
        insr = new InputStreamReader(inis, "utf-8");
        inbr = new BufferedReader(insr);
        int maxChar;
        maxChar = Character.MAX_VALUE;

        ow.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
        License.writeXMLLicenseId(ow);
        ow.write("\n");
        writeGenerated(ow);
        ow.write("\n");
        ow.write("<classes>\n");
        for (String line = inbr.readLine(); line != null; line = inbr.readLine()) {
            String[] fields = line.split(";", NUM_FIELDS);
            int code = Integer.parseInt(fields[UNICODE], 16);
            if (code > maxChar) {
                break;
            }
            if (((fields[GENERAL_CATEGORY].equals("Ll") || fields[GENERAL_CATEGORY].equals("Lu")
                            || fields[GENERAL_CATEGORY].equals("Lt"))
                        && ("".equals(fields[SIMPLE_LOWERCASE_MAPPING])
                                || fields[UNICODE].equals(fields[SIMPLE_LOWERCASE_MAPPING])))
                    || fields[GENERAL_CATEGORY].equals("Lo")) {
                String[] blockNames = {"Superscripts and Subscripts",
                                       "Letterlike Symbols",
                                       "Alphabetic Presentation Forms",
                                       "Halfwidth and Fullwidth Forms",
                                       "CJK Unified Ideographs",
                                       "CJK Unified Ideographs Extension A",
                                       "Hangul Syllables"};
                int j;
                for (j = 0; j < blockNames.length; ++j) {
                    int[] ind = (int[]) blocks.get(blockNames[j]);
                    if (code >= ind[0] && code <= ind[1]) {
                        break;
                    }
                }
                if (j < blockNames.length) {
                    continue;
                }

                int uppercode = -1;
                int titlecode = -1;
                if (!"".equals(fields[SIMPLE_UPPERCASE_MAPPING])) {
                    uppercode = Integer.parseInt(fields[SIMPLE_UPPERCASE_MAPPING], 16);
                }
                if (!"".equals(fields[SIMPLE_TITLECASE_MAPPING])) {
                    titlecode = Integer.parseInt(fields[SIMPLE_TITLECASE_MAPPING], 16);
                }
                StringBuilder s = new StringBuilder();
                if (hexcode) {
                    s.append("0x" + fields[UNICODE].replaceFirst("^0+", "").toLowerCase() + " ");
                }
                s.append(Character.toChars(code));
                if (uppercode != -1 && uppercode != code) {
                    s.append(Character.toChars(uppercode));
                }
                if (titlecode != -1 && titlecode != code && titlecode != uppercode) {
                    s.append(Character.toChars(titlecode));
                }
                ow.write(s.toString() + "\n");
            }
        }
        ow.write("</classes>\n");
        ow.flush();
        ow.close();
        inbr.close();
    }