in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/ExecutableParser.java [435:529]
public void parseMachO(XHTMLContentHandler xhtml, Metadata metadata, InputStream stream,
byte[] first4) throws TikaException, IOException {
var isLE = first4[3] == (byte) 0xFE;
if (isLE) {
metadata.set(ENDIAN, Endian.LITTLE.getName());
} else {
metadata.set(ENDIAN, Endian.BIG.getName());
}
// Bytes 5-8 are the CPU type and architecture bits
var cpuType = isLE
? EndianUtils.readIntLE(stream)
: EndianUtils.readIntBE(stream);
if ((cpuType >> 24) == 1) {
metadata.set(ARCHITECTURE_BITS, "64");
}
switch (cpuType) {
case 1:
metadata.set(MACHINE_TYPE, MACHINE_VAX);
break;
case 6:
metadata.set(MACHINE_TYPE, MACHINE_M68K);
break;
case 7:
metadata.set(MACHINE_TYPE, MACHINE_x86_32);
break;
case (7 | 0x01000000):
metadata.set(MACHINE_TYPE, MACHINE_x86_64);
break;
case 8:
metadata.set(MACHINE_TYPE, MACHINE_MIPS);
break;
case 12:
case (12 | 0x01000000):
metadata.set(MACHINE_TYPE, MACHINE_ARM);
break;
case 13:
metadata.set(MACHINE_TYPE, MACHINE_M88K);
break;
case 14:
metadata.set(MACHINE_TYPE, MACHINE_SPARC);
break;
case 18:
metadata.set(MACHINE_TYPE, MACHINE_PPC);
break;
}
// Bytes 9-12 are the CPU subtype
var cpuSubtype = isLE
? EndianUtils.readIntLE(stream)
: EndianUtils.readIntBE(stream);
// Bytes 13-16 are the file type
var fileType = isLE
? EndianUtils.readIntLE(stream)
: EndianUtils.readIntBE(stream);
switch (fileType) {
case 0x1:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_OBJECT.toString());
break;
case 0x2:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_EXECUTABLE.toString());
break;
case 0x3:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_FVMLIB.toString());
break;
case 0x4:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_CORE.toString());
break;
case 0x5:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_PRELOAD.toString());
break;
case 0x6:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_DYLIB.toString());
break;
case 0x7:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_DYLINKER.toString());
break;
case 0x8:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_BUNDLE.toString());
break;
case 0x9:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_DYLIB_STUB.toString());
break;
case 0xa:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_DSYM.toString());
break;
case 0xb:
metadata.set(Metadata.CONTENT_TYPE, MACH_O_KEXT_BUNDLE.toString());
break;
default:
metadata.set(Metadata.CONTENT_TYPE, MACH_O.toString());
break;
}
}