in odps-console-dship/src/main/java/com/aliyun/odps/ship/upload/RecordReader.java [47:79]
protected void detectBomCharset() throws IOException {
InputStream internalIs = blockInfo.getFileInputStream();
try {
byte bom[] = new byte[4];
int n = internalIs.read(bom, 0, bom.length);
if ((n >= 4) && (bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) &&
(bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
detectedCharset = "UTF-32BE";
bomBytes = 4;
} else if ((n >= 4) && (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) &&
(bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
detectedCharset = "UTF-32LE";
bomBytes = 4;
} else if ((n >= 3) && (bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) &&
(bom[2] == (byte) 0xBF)) {
detectedCharset = "UTF-8";
bomBytes = 3;
} else if ((n >= 2) && (bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
detectedCharset = "UTF-16BE";
bomBytes = 2;
} else if ((n >= 2) && (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
detectedCharset = "UTF-16LE";
bomBytes = 2;
} else {
// Unicode BOM mark not found, unread all bytes
detectedCharset = null;
bomBytes = 0;
}
} finally {
IOUtils.closeQuietly(internalIs);
}
}