in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/ExecutableParser.java [259:430]
public void parseELF(XHTMLContentHandler xhtml, Metadata metadata, InputStream stream,
byte[] first4) throws TikaException, IOException {
// Byte 5 is the architecture
int architecture = stream.read();
if (architecture == 1) {
metadata.set(ARCHITECTURE_BITS, "32");
} else if (architecture == 2) {
metadata.set(ARCHITECTURE_BITS, "64");
}
// Byte 6 is the endian-ness
int endian = stream.read();
if (endian == 1) {
metadata.set(ENDIAN, Endian.LITTLE.getName());
} else if (endian == 2) {
metadata.set(ENDIAN, Endian.BIG.getName());
}
// Byte 7 is the elf version
int elfVer = stream.read();
// Byte 8 is the OS, if set (lots of compilers don't)
// Byte 9 is the OS (specific) ABI version
int os = stream.read();
int osVer = stream.read();
if (os > 0 || osVer > 0) {
switch (os) {
case 0:
metadata.set(PLATFORM, PLATFORM_SYSV);
break;
case 1:
metadata.set(PLATFORM, PLATFORM_HPUX);
break;
case 2:
metadata.set(PLATFORM, PLATFORM_NETBSD);
break;
case 3:
metadata.set(PLATFORM, PLATFORM_LINUX);
break;
case 6:
metadata.set(PLATFORM, PLATFORM_SOLARIS);
break;
case 7:
metadata.set(PLATFORM, PLATFORM_AIX);
break;
case 8:
metadata.set(PLATFORM, PLATFORM_IRIX);
break;
case 9:
metadata.set(PLATFORM, PLATFORM_FREEBSD);
break;
case 10:
metadata.set(PLATFORM, PLATFORM_TRU64);
break;
case 12:
metadata.set(PLATFORM, PLATFORM_FREEBSD);
break;
case 64:
case 97:
metadata.set(PLATFORM, PLATFORM_ARM);
break;
case 255:
metadata.set(PLATFORM, PLATFORM_EMBEDDED);
break;
}
}
// Bytes 10-16 are padding and lengths
byte[] padLength = new byte[7];
IOUtils.readFully(stream, padLength);
// Bytes 16-17 are the object type (LE/BE)
int type;
if (endian == 1) {
type = EndianUtils.readUShortLE(stream);
} else {
type = EndianUtils.readUShortBE(stream);
}
switch (type) {
case 1:
metadata.set(Metadata.CONTENT_TYPE, ELF_OBJECT.toString());
break;
case 2:
metadata.set(Metadata.CONTENT_TYPE, ELF_EXECUTABLE.toString());
break;
case 3:
metadata.set(Metadata.CONTENT_TYPE, ELF_SHAREDLIB.toString());
break;
case 4:
metadata.set(Metadata.CONTENT_TYPE, ELF_COREDUMP.toString());
break;
default:
metadata.set(Metadata.CONTENT_TYPE, ELF_GENERAL.toString());
break;
}
// Bytes 18-19 are the machine (EM_*)
int machine;
if (endian == 1) {
machine = EndianUtils.readUShortLE(stream);
} else {
machine = EndianUtils.readUShortBE(stream);
}
switch (machine) {
case 2:
case 18:
case 43:
metadata.set(MACHINE_TYPE, MACHINE_SPARC);
break;
case 3:
metadata.set(MACHINE_TYPE, MACHINE_x86_32);
break;
case 4:
metadata.set(MACHINE_TYPE, MACHINE_M68K);
break;
case 5:
metadata.set(MACHINE_TYPE, MACHINE_M88K);
break;
case 8:
case 10:
metadata.set(MACHINE_TYPE, MACHINE_MIPS);
break;
case 7:
metadata.set(MACHINE_TYPE, MACHINE_S370);
break;
case 20:
case 21:
metadata.set(MACHINE_TYPE, MACHINE_PPC);
break;
case 22:
metadata.set(MACHINE_TYPE, MACHINE_S390);
break;
case 40:
metadata.set(MACHINE_TYPE, MACHINE_ARM);
break;
case 41:
case 0x9026:
metadata.set(MACHINE_TYPE, MACHINE_ALPHA);
break;
case 50:
metadata.set(MACHINE_TYPE, MACHINE_IA_64);
break;
case 62:
metadata.set(MACHINE_TYPE, MACHINE_x86_64);
break;
case 75:
metadata.set(MACHINE_TYPE, MACHINE_VAX);
break;
case 88:
metadata.set(MACHINE_TYPE, MACHINE_M32R);
break;
}
// Bytes 20-23 are the version
// TODO
}