in Frameworks/CoreFoundation/PlugIn.subproj/CFBundle_Grok.c [701:941]
static Boolean _CFBundleGrokFileType(CFURLRef url, CFDataRef data, CFStringRef *extension, UInt32 *machtype, CFArrayRef *architectures, CFDictionaryRef *infodict, Boolean *hasObjc, uint32_t *objcVersion, uint32_t *objcFlags) {
int fd = -1;
const unsigned char *bytes = NULL;
unsigned char buffer[MAGIC_BYTES_TO_READ];
CFIndex i, length = 0;
off_t fileLength = 0;
const char *ext = NULL;
UInt32 mt = UNKNOWN_FILETYPE;
#if defined(BINARY_SUPPORT_DYLD)
Boolean isX11 = false;
#endif /* BINARY_SUPPORT_DYLD */
Boolean isFile = false, isPlain = true, isZero = true, isSpace = true, hasBOM = false;
// extensions returned: o, tool, x11app, pef, core, dylib, bundle, elf, jpeg, jp2, tiff, gif, png, pict, icns, ico, rtf, rtfd, pdf, ra, rm, au, aiff, aifc, caf, wav, avi, wmv, ogg, flac, psd, mpeg, mid, zip, jar, sit, cpio, html, ps, mov, qtif, ttf, otf, sfont, bmp, hqx, bin, class, tar, txt, gz, Z, uu, ync, bz, bz2, sh, pl, py, rb, dvi, sgi, tga, mp3, xml, plist, xls, doc, ppt, mp4, m4a, m4b, m4p, m4v, 3gp, 3g2, dmg, cwk, webarchive, dwg, dgn, pfa, pfb, afm, tfm, xcf, cpx, dwf, swf, swc, abw, bom, lit, svg, rdf, x3d, oeb, dtb, docx, xlsx, pptx, sxc, sxd, sxg, sxi, sxm, sxw, odc, odf, odg, oth, odi, odm, odp, ods, cin, exr
// ??? we do not distinguish between different wm types, returning wmv for any of wmv, wma, or asf
// ??? we do not distinguish between ordinary documents and template versions (often there is no difference in file contents)
// ??? the distinctions between docx, xlsx, and pptx may not be entirely reliable
if (architectures) *architectures = NULL;
if (infodict) *infodict = NULL;
if (hasObjc) *hasObjc = false;
if (objcVersion) *objcVersion = 0;
if (objcFlags) *objcFlags = 0;
if (url) {
Boolean gotPath = FALSE;
char path[CFMaxPathSize];
gotPath = CFURLGetFileSystemRepresentation(url, true, (uint8_t *)path, CFMaxPathSize);
struct statinfo statBuf;
if (gotPath && stat(path, &statBuf) == 0 && (statBuf.st_mode & S_IFMT) == S_IFREG && (fd = open(path, O_RDONLY | CF_OPENFLGS, 0777)) >= 0) {
length = read(fd, buffer, MAGIC_BYTES_TO_READ);
fileLength = statBuf.st_size;
bytes = buffer;
isFile = true;
}
}
if (!isFile && data) {
length = CFDataGetLength(data);
fileLength = (off_t)length;
bytes = CFDataGetBytePtr(data);
if (length == 0) ext = "txt";
}
if (bytes) {
if (length >= 4) {
UInt32 magic = CFSwapInt32HostToBig(*((UInt32 *)bytes));
for (i = 0; !ext && i < NUM_EXTENSIONS; i++) {
if (__CFBundleMagicNumbersArray[i] == magic) ext = __CFBundleExtensionsArray + i * EXTENSION_LENGTH;
}
if (ext) {
if (0xcafebabe == magic && 8 <= length && 0 != *((UInt16 *)(bytes + 4))) ext = "class";
#if defined(BINARY_SUPPORT_DYLD)
else if ((int)sizeof(struct mach_header_64) <= length) mt = _CFBundleGrokMachType(fd, bytes, length, extension ? &isX11 : NULL, architectures, infodict, hasObjc, objcVersion, objcFlags);
if (MH_OBJECT == mt) ext = "o";
else if (MH_EXECUTE == mt) ext = isX11 ? "x11app" : "tool";
else if (PEF_FILETYPE == mt) ext = "pef";
else if (MH_CORE == mt) ext = "core";
else if (MH_DYLIB == mt) ext = "dylib";
else if (MH_BUNDLE == mt) ext = "bundle";
#endif /* BINARY_SUPPORT_DYLD */
else if (0x7b5c7274 == magic && (6 > length || 'f' != bytes[4])) ext = NULL;
else if (0x25504446 == magic && (6 > length || '-' != bytes[4])) ext = NULL;
else if (0x00010000 == magic && (6 > length || 0 != bytes[4])) ext = NULL;
else if (0x47494638 == magic && (6 > length || (0x3761 != CFSwapInt16HostToBig(*((UInt16 *)(bytes + 4))) && 0x3961 != CFSwapInt16HostToBig(*((UInt16 *)(bytes + 4)))))) ext = NULL;
else if (0x0000000c == magic && (6 > length || 0x6a50 != CFSwapInt16HostToBig(*((UInt16 *)(bytes + 4))))) ext = NULL;
else if (0x2356524d == magic && (6 > length || 0x4c20 != CFSwapInt16HostToBig(*((UInt16 *)(bytes + 4))))) ext = NULL;
else if (0x28445746 == magic && (6 > length || 0x2056 != CFSwapInt16HostToBig(*((UInt16 *)(bytes + 4))))) ext = NULL;
else if (0x30373037 == magic && (6 > length || 0x30 != bytes[4] || !isdigit(bytes[5]))) ext = NULL;
else if (0x41433130 == magic && (6 > length || 0x31 != bytes[4] || !isdigit(bytes[5]))) ext = NULL;
else if (0x89504e47 == magic && (8 > length || 0x0d0a1a0a != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = NULL;
else if (0x53747566 == magic && (8 > length || 0x66497420 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = NULL;
else if (0x3026b275 == magic && (8 > length || 0x8e66cf11 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = NULL;
else if (0x67696d70 == magic && (8 > length || 0x20786366 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = NULL;
else if (0x424f4d53 == magic && (8 > length || 0x746f7265 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = NULL;
else if (0x49544f4c == magic && (8 > length || 0x49544c53 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = NULL;
else if (0x72746664 == magic && (8 > length || 0x00000000 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = NULL;
else if (0x3d796265 == magic && (12 > length || 0x67696e20 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))) || (0x6c696e65 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 8))) && 0x70617274 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 8)))))) ext = NULL;
else if (0x63616666 == magic && (12 > length || 0 != bytes[4] || 0x64657363 != CFSwapInt32HostToBig(*((UInt32 *)(bytes + 8))))) ext = NULL;
else if (0x504b0304 == magic) ext = _CFBundleGrokFileTypeForZipFile(fd, bytes, length, fileLength);
else if (0x25215053 == magic) {
if (11 <= length && 0 == ustrncmp(bytes + 4, "-Adobe-", 7)) ext = "ps";
else if (14 <= length && 0 == ustrncmp(bytes + 4, "-AdobeFont", 10)) ext = "pfa";
else ext = NULL;
} else if (0x464f524d == magic) {
// IFF
ext = NULL;
if (12 <= length) {
UInt32 iffMagic = CFSwapInt32HostToBig(*((UInt32 *)(bytes + 8)));
if (0x41494646 == iffMagic) ext = "aiff";
else if (0x414946 == iffMagic) ext = "aifc";
}
} else if (0x52494646 == magic) {
// RIFF
ext = NULL;
if (12 <= length) {
UInt32 riffMagic = CFSwapInt32HostToBig(*((UInt32 *)(bytes + 8)));
if (0x57415645 == riffMagic) ext = "wav";
else if (0x41564920 == riffMagic) ext = "avi";
}
} else if (0xd0cf11e0 == magic) {
// OLE
if (52 <= length) ext = _CFBundleGrokFileTypeForOLEFile(fd, bytes, length, 512 * (1 + CFSwapInt32HostToLittle(*((UInt32 *)(bytes + 48)))));
} else if (0x62656769 == magic) {
// uu
ext = NULL;
if (76 <= length && 'n' == bytes[4] && ' ' == bytes[5] && isdigit(bytes[6]) && isdigit(bytes[7]) && isdigit(bytes[8]) && ' ' == bytes[9]) {
CFIndex endOfLine = 0;
for (i = 10; 0 == endOfLine && i < length; i++) if ('\n' == bytes[i]) endOfLine = i;
if (10 <= endOfLine && endOfLine + 62 < length && 'M' == bytes[endOfLine + 1] && '\n' == bytes[endOfLine + 62]) {
ext = "uu";
for (i = endOfLine + 1; ext && i < endOfLine + 62; i++) if (!isprint(bytes[i])) ext = NULL;
}
}
}
}
if (extension && !ext) {
UInt16 shortMagic = CFSwapInt16HostToBig(*((UInt16 *)bytes));
if (5 <= length && 0 == bytes[3] && 0 == bytes[4] && ((1 == bytes[1] && 1 == (0xf7 & bytes[2])) || (0 == bytes[1] && (2 == (0xf7 & bytes[2]) || (3 == (0xf7 & bytes[2])))))) ext = "tga";
else if (8 <= length && (0x6d6f6f76 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))) || 0x6d646174 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))) || 0x77696465 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = "mov";
else if (8 <= length && (0x69647363 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))) || 0x69646174 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = "qtif";
else if (8 <= length && 0x424f424f == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4)))) ext = "cwk";
else if (8 <= length && 0x62706c69 == magic && 0x7374 == CFSwapInt16HostToBig(*((UInt16 *)(bytes + 4))) && isdigit(bytes[6]) && isdigit(bytes[7])) {
for (i = 8; !ext && i < 128 && i + 16 <= length; i++) {
if (0 == ustrncmp(bytes + i, "WebMainResource", 15)) ext = "webarchive";
}
if (!ext) ext = "plist";
} else if (0 == shortMagic && 12 <= length && 0x66747970 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4)))) {
// ??? may want more ftyp values
UInt32 ftyp = CFSwapInt32HostToBig(*((UInt32 *)(bytes + 8)));
if (0x6d703431 == ftyp || 0x6d703432 == ftyp || 0x69736f6d == ftyp || 0x69736f32 == ftyp) ext = "mp4";
else if (0x4d344120 == ftyp) ext = "m4a";
else if (0x4d344220 == ftyp) ext = "m4b";
else if (0x4d345020 == ftyp) ext = "m4p";
else if (0x4d345620 == ftyp || 0x4d345648 == ftyp || 0x4d345650 == ftyp) ext = "m4v";
else if (0x3367 == (ftyp >> 16)) {
UInt16 remainder = (ftyp & 0xffff);
if (0x6536 == remainder || 0x6537 == remainder || 0x6736 == remainder || 0x7034 == remainder || 0x7035 == remainder || 0x7036 == remainder || 0x7236 == remainder || 0x7336 == remainder || 0x7337 == remainder) ext = "3gp";
else if (0x3261 == remainder) ext = "3g2";
}
} else if (0x424d == shortMagic && 18 <= length) {
UInt32 btyp = CFSwapInt32HostToLittle(*((UInt32 *)(bytes + 14)));
if (40 == btyp || btyp == 12 || btyp == 64 || btyp == 108 || btyp == 124) ext = "bmp";
} else if (20 <= length && 0 == ustrncmp(bytes + 6, "%!PS-AdobeFont", 14)) ext = "pfb";
else if (40 <= length && 0x42696e48 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 34))) && 0x6578 == CFSwapInt16HostToBig(*((UInt16 *)(bytes + 38)))) ext = "hqx";
else if (128 <= length && 0x6d42494e == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 102)))) ext = "bin";
else if (128 <= length && 0 == bytes[0] && 0 < bytes[1] && bytes[1] < 64 && 0 == bytes[74] && 0 == bytes[82] && 0 == (fileLength % 128)) {
UInt32 df = CFSwapInt32HostToBig(*((UInt32 *)(bytes + 83))), rf = CFSwapInt32HostToBig(*((UInt32 *)(bytes + 87))), blocks = 1 + (df + 127) / 128 + (rf + 127) / 128;
if (df < 0x00800000 && rf < 0x00800000 && 1 < blocks && (off_t)(128 * blocks) == fileLength) ext = "bin";
} else if (265 <= length && 0x75737461 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 257))) && (0x72202000 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 261))) || 0x7200 == CFSwapInt16HostToBig(*((UInt16 *)(bytes + 261))))) ext = "tar";
else if (0xfeff == shortMagic || 0xfffe == shortMagic) {
ext = "txt";
if (12 <= length && ((0x3cfeff == *((UInt32 *)bytes) && 0x740068 == *((UInt32 *)(bytes + 4)) && 0x6c006d == *((UInt32 *)(bytes + 8))) || (0xfffe3c00 == *((UInt32 *)bytes) && 0x68007400 == *((UInt32 *)(bytes + 4)) && 0x6d006c00 == *((UInt32 *)(bytes + 8))))) ext = "html";
} else if (0x1f9d == shortMagic) ext = "Z";
else if (0x1f8b == shortMagic) ext = "gz";
else if (0x71c7 == shortMagic || 0xc771 == shortMagic) ext = "cpio";
else if (0xf702 == shortMagic) ext = "dvi";
else if (0x01da == shortMagic && (0 == bytes[2] || 1 == bytes[2]) && (0 < bytes[3] && 16 > bytes[3])) ext = "sgi";
else if (0x2321 == shortMagic) {
CFIndex endOfLine = 0, lastSlash = 0;
for (i = 2; 0 == endOfLine && i < length; i++) if ('\n' == bytes[i]) endOfLine = i;
if (endOfLine > 3) {
for (i = endOfLine - 1; 0 == lastSlash && i > 1; i--) if ('/' == bytes[i]) lastSlash = i;
if (lastSlash > 0) {
if (0 == ustrncmp(bytes + lastSlash + 1, "perl", 4)) ext = "pl";
else if (0 == ustrncmp(bytes + lastSlash + 1, "python", 6)) ext = "py";
else if (0 == ustrncmp(bytes + lastSlash + 1, "ruby", 4)) ext = "rb";
else ext = "sh";
}
}
} else if (0xffd8 == shortMagic && 0xff == bytes[2]) ext = "jpeg";
else if (0x4657 == shortMagic && 0x53 == bytes[2]) ext = "swf";
else if (0x4357 == shortMagic && 0x53 == bytes[2]) ext = "swc";
else if (0x4944 == shortMagic && '3' == bytes[2] && 0x20 > bytes[3]) ext = "mp3";
else if (0x425a == shortMagic && isdigit(bytes[2]) && isdigit(bytes[3])) ext = "bz";
else if (0x425a == shortMagic && 'h' == bytes[2] && isdigit(bytes[3]) && 8 <= length && (0x31415926 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))) || 0x17724538 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 4))))) ext = "bz2";
else if (0x0011 == CFSwapInt16HostToBig(*((UInt16 *)(bytes + 2))) || 0x0012 == CFSwapInt16HostToBig(*((UInt16 *)(bytes + 2)))) ext = "tfm";
}
}
if (extension && !ext) {
//??? what about MacOSRoman?
if (0xef == bytes[0] && 0xbb == bytes[1] && 0xbf == bytes[2]) { // UTF-8 BOM
hasBOM = true;
isZero = false;
}
for (i = (hasBOM ? 3 : 0); (isPlain || isZero) && !ext && i < length && i < 512; i++) {
char c = bytes[i];
if (isPlain && '<' == c && i + 14 <= length && 0 == ustrncasecmp(bytes + i + 1, "!doctype html", 13)) ext = "html";
if (isSpace && '<' == c && i + 14 <= length) {
if (0 == ustrncasecmp(bytes + i + 1, "!doctype html", 13) || 0 == ustrncasecmp(bytes + i + 1, "head", 4) || 0 == ustrncasecmp(bytes + i + 1, "title", 5) || 0 == ustrncasecmp(bytes + i + 1, "script", 6) || 0 == ustrncasecmp(bytes + i + 1, "html", 4)) {
ext = "html";
} else if (0 == ustrncasecmp(bytes + i + 1, "?xml", 4)) {
for (i += 4; !ext && i < 128 && i + 20 <= length; i++) {
if ('<' == bytes[i]) {
if (0 == ustrncasecmp(bytes + i + 1, "abiword", 7)) ext = "abw";
else if (0 == ustrncasecmp(bytes + i + 1, "!doctype svg", 12)) ext = "svg";
else if (0 == ustrncasecmp(bytes + i + 1, "!doctype rdf", 12)) ext = "rdf";
else if (0 == ustrncasecmp(bytes + i + 1, "!doctype x3d", 12)) ext = "x3d";
else if (0 == ustrncasecmp(bytes + i + 1, "!doctype html", 13)) ext = "html";
else if (0 == ustrncasecmp(bytes + i + 1, "!doctype posingfont", 19)) ext = "sfont";
else if (0 == ustrncasecmp(bytes + i + 1, "!doctype plist", 14)) {
for (i += 14; !ext && i < 256 && i + 16 <= length; i++) {
if (0 == ustrncmp(bytes + i, "WebMainResource", 15)) ext = "webarchive";
}
if (!ext) ext = "plist";
}
}
}
if (!ext) ext = "xml";
}
}
if (0 != c) isZero = false;
if (isZero || 0x7f <= c || (0x20 > c && !isspace(c))) isPlain = false;
if (isZero || !isspace(c)) isSpace = false;
}
if (!ext) {
if (isPlain) {
if (16 <= length && 0 == ustrncmp(bytes, "StartFontMetrics", 16)) ext = "afm";
else ext = "txt";
} else if (isZero && length >= MAGIC_BYTES_TO_READ && fileLength >= 526) {
if (isFile) {
if (lseek(fd, 512, SEEK_SET) == 512 && read(fd, buffer, MAGIC_BYTES_TO_READ) >= 14) {
if (0x001102ff == CFSwapInt32HostToBig(*((UInt32 *)(buffer + 10)))) ext = "pict";
}
} else {
if (526 <= length && 0x001102ff == CFSwapInt32HostToBig(*((UInt32 *)(bytes + 522)))) ext = "pict";
}
}
}
}
if (extension && (!ext || 0 == strcmp(ext, "bz2")) && length >= MAGIC_BYTES_TO_READ && fileLength >= DMG_BYTES_TO_READ) {
if (isFile) {
if (lseek(fd, fileLength - DMG_BYTES_TO_READ, SEEK_SET) == fileLength - DMG_BYTES_TO_READ && read(fd, buffer, DMG_BYTES_TO_READ) >= DMG_BYTES_TO_READ) {
if (0x6b6f6c79 == CFSwapInt32HostToBig(*((UInt32 *)buffer)) || (0x63647361 == CFSwapInt32HostToBig(*((UInt32 *)(buffer + DMG_BYTES_TO_READ - 8))) && 0x656e6372 == CFSwapInt32HostToBig(*((UInt32 *)(buffer + DMG_BYTES_TO_READ - 4))))) ext = "dmg";
}
} else {
if (DMG_BYTES_TO_READ <= length && (0x6b6f6c79 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + length - DMG_BYTES_TO_READ))) || (0x63647361 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + length - 8))) && 0x656e6372 == CFSwapInt32HostToBig(*((UInt32 *)(bytes + length - 4)))))) ext = "dmg";
}
}
}
if (extension) *extension = ext ? CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, ext, kCFStringEncodingUTF8, kCFAllocatorNull) : NULL;
if (machtype) *machtype = mt;
if (fd >= 0) close(fd);
return (ext ? true : false);
}