protected void detectBomCharset()

in odps-console-dship/src/main/java/com/aliyun/odps/ship/upload/RecordReader.java [47:79]


  protected void detectBomCharset() throws IOException {
    InputStream internalIs = blockInfo.getFileInputStream();
    try {
      byte bom[] = new byte[4];
      int n = internalIs.read(bom, 0, bom.length);

      if ((n >= 4) && (bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) &&
          (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
        detectedCharset = "UTF-32BE";
        bomBytes = 4;
      } else if ((n >= 4) && (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) &&
                 (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
        detectedCharset = "UTF-32LE";
        bomBytes = 4;
      } else if ((n >= 3) && (bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) &&
                 (bom[2] == (byte) 0xBF)) {
        detectedCharset = "UTF-8";
        bomBytes = 3;
      } else if ((n >= 2) && (bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
        detectedCharset = "UTF-16BE";
        bomBytes = 2;
      } else if ((n >= 2) && (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
        detectedCharset = "UTF-16LE";
        bomBytes = 2;
      } else {
        // Unicode BOM mark not found, unread all bytes
        detectedCharset = null;
        bomBytes = 0;
      }
    } finally {
      IOUtils.closeQuietly(internalIs);
    }
  }