private static Charset getCharset()

in johnzon-core/src/main/java/org/apache/johnzon/core/RFC4627AwareInputStreamReader.java [86:155]


    private static Charset getCharset(final PushbackInputStream inputStream) {
        Charset charset = StandardCharsets.UTF_8;
        int bomLength=0;
        try {
            final byte[] utfBytes = readAllBytes(inputStream);
            if (utfBytes.length == 0) {
                return StandardCharsets.UTF_8; // empty file -> doesn't matter anyway
            }
            if (utfBytes.length == 1) {
                if (utfBytes[0] == 0) { // TCK shortcut since behavior is doubious
                    throw new JsonException("Unknown encoding");
                }
                inputStream.unread(utfBytes);
                return StandardCharsets.UTF_8; // almost empty file -> doesn't matter neither
            }

            int first = (utfBytes[0] & 0xFF);
            int second = (utfBytes[1] & 0xFF);
            if (first == 0x00) {
                charset = (second == 0x00) ? Charset.forName("UTF-32BE") : Charset.forName("UTF-16BE");
            } else if (utfBytes.length > 2 && second == 0x00) {
                int third = (utfBytes[2] & 0xFF);
                charset = (third  == 0x00) ? Charset.forName("UTF-32LE") : Charset.forName("UTF-16LE");
            } else {

                    /*check BOM

                    Encoding       hex byte order mark
                    UTF-8          EF BB BF
                    UTF-16 (BE)    FE FF
                    UTF-16 (LE)    FF FE
                    UTF-32 (BE)    00 00 FE FF
                    UTF-32 (LE)    FF FE 00 00
                    */

                //We do not check for UTF-32BE because that is already covered above and we
                //do not to unread anything.

                if(first == 0xFE && second == 0xFF) {
                    charset = Charset.forName("UTF-16BE");
                    bomLength=2;
                } else if(first == 0xFF && second == 0xFE) {
                    if(utfBytes.length > 3 && (utfBytes[2]&0xff) == 0x00 && (utfBytes[3]&0xff) == 0x00) {
                        charset = Charset.forName("UTF-32LE");
                        bomLength=4;
                    }else {
                        charset = Charset.forName("UTF-16LE");
                        bomLength=2;
                    }
                } else if (utfBytes.length > 2 && first == 0xEF && second == 0xBB && (utfBytes[2]&0xff) == 0xBF) {
                    //UTF-8 with BOM
                    bomLength=3;
                }
            }
            //assume UTF8
            if(bomLength > 0 && bomLength < 4) {             
                //do not unread BOM, only bytes after BOM        
                inputStream.unread(utfBytes,bomLength,utfBytes.length - bomLength);
            } else {             
                //no BOM, unread all read bytes
                inputStream.unread(utfBytes);
            }
          

        } catch (final IOException e) {
            throw new JsonException("Unable to detect charset due to "+e.getMessage(), e);
        }

        return charset;
    }