FileType identifyFile()

in tools/licenses/lib/filesystem.dart [65:299]


FileType identifyFile(String name, Reader reader) {
  List<int> bytes;
  if ((path.split(name).reversed.take(6).toList().reversed.join('/') == 'third_party/icu/source/extra/uconv/README') || // This specific ICU README isn't in UTF-8.
      (path.split(name).reversed.take(6).toList().reversed.join('/') == 'third_party/icu/source/samples/uresb/sr.txt') || // This specific sample contains non-UTF-8 data (unlike other sr.txt files).
      (path.split(name).reversed.take(2).toList().reversed.join('/') == 'builds/detect.mk') || // This specific freetype sample contains non-UTF-8 data (unlike other .mk files).
      (path.split(name).reversed.take(3).toList().reversed.join('/') == 'third_party/cares/cares.rc')) // This file has a copyright symbol in Latin1 in it
    return FileType.latin1Text;
  if (path.split(name).reversed.take(6).toList().reversed.join('/') == 'dart/runtime/tests/vm/dart/bad_snapshot' || // Not any particular format
      path.split(name).reversed.take(8).toList().reversed.join('/') == 'third_party/android_tools/ndk/sources/cxx-stl/stlport/src/stlport.rc') // uses the word "copyright" but doesn't have a copyright header
    return FileType.binary;
  final String base = path.basename(name);
  if (base.startsWith('._')) {
    bytes ??= reader();
    if (matchesSignature(bytes, <int>[0x00, 0x05, 0x16, 0x07, 0x00, 0x02, 0x00, 0x00, 0x4d, 0x61, 0x63, 0x20, 0x4f, 0x53, 0x20, 0x58]))
      return FileType.metadata; // The ._* files in Mac OS X archives that gives icons and stuff
  }
  if (path.split(name).contains('cairo')) {
    bytes ??= reader();
    // "Copyright <latin1 copyright symbol> "
    if (hasSubsequence(bytes, <int>[0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20, 0xA9, 0x20], kMaxSize))
      return FileType.latin1Text;
  }
  switch (base) {
    // Build files
    case 'DEPS': return FileType.text;
    case 'MANIFEST': return FileType.text;
    // Licenses
    case 'COPYING': return FileType.text;
    case 'LICENSE': return FileType.text;
    case 'NOTICE.txt': return isMultiLicenseNotice(reader) ? FileType.binary : FileType.text;
    case 'NOTICE': return FileType.text;
    // Documentation
    case 'Changes': return FileType.text;
    case 'change.log': return FileType.text;
    case 'ChangeLog': return FileType.text;
    case 'CHANGES.0': return FileType.latin1Text;
    case 'README': return FileType.text;
    case 'TODO': return FileType.text;
    case 'NEWS': return FileType.text;
    case 'README.chromium': return FileType.text;
    case 'README.flutter': return FileType.text;
    case 'README.tests': return FileType.text;
    case 'OWNERS': return FileType.text;
    case 'AUTHORS': return FileType.text;
    // Signatures (found in .jar files typically)
    case 'CERT.RSA': return FileType.binary;
    case 'ECLIPSE_.RSA': return FileType.binary;
    // Binary data files
    case 'tzdata': return FileType.binary;
    case 'compressed_atrace_data.txt': return FileType.binary;
    // Source files that don't use UTF-8
    case 'Messages_de_DE.properties': // has a few non-ASCII characters they forgot to escape (from gnu-libstdc++)
    case 'mmx_blendtmp.h': // author name in comment contains latin1 (mesa)
    case 'calling_convention.txt': // contains a soft hyphen instead of a real hyphen for some reason (mesa)
    // Character encoding data files
    case 'danish-ISO-8859-1.txt':
    case 'eucJP.txt':
    case 'hangul-eucKR.txt':
    case 'hania-eucKR.txt':
    case 'ibm-37-test.txt':
    case 'iso8859-1.txt':
    case 'ISO-8859-2.txt':
    case 'ISO-8859-3.txt':
    case 'koi8r.txt':
      return FileType.latin1Text;
    // Giant data files
    case 'icudtl_dat.S':
    case 'icudtl.dat':
      return FileType.binary;
  }
  switch (path.extension(name)) {
    // C/C++ code
    case '.h': return FileType.text;
    case '.c': return FileType.text;
    case '.cc': return FileType.text;
    case '.cpp': return FileType.text;
    case '.inc': return FileType.text;
    // ObjectiveC code
    case '.m': return FileType.text;
    // Assembler
    case '.asm': return FileType.text;
    // Shell
    case '.sh': return FileType.text;
    case '.bat': return FileType.text;
    // Build files
    case '.in': return FileType.text;
    case '.ac': return FileType.text;
    case '.am': return FileType.text;
    case '.gn': return FileType.text;
    case '.gni': return FileType.text;
    case '.gyp': return FileType.text;
    case '.gypi': return FileType.text;
    // Java code
    case '.java': return FileType.text;
    case '.jar': return FileType.zip; // Java package
    case '.class': return FileType.binary; // compiled Java bytecode (usually found inside .jar archives)
    case '.dex': return FileType.binary; // Dalvik Executable (usually found inside .jar archives)
    // Dart code
    case '.dart': return FileType.text;
    case '.dill': return FileType.binary; // Compiled Dart code
    // LLVM bitcode
    case '.bc': return FileType.binary;
    // Python code
    case '.py':
      bytes ??= reader();
      // # -*- coding: Latin-1 -*-
      if (matchesSignature(bytes, <int>[0x23, 0x20, 0x2d, 0x2a, 0x2d, 0x20, 0x63, 0x6f, 0x64,
                                        0x69, 0x6e, 0x67, 0x3a, 0x20, 0x4c, 0x61, 0x74, 0x69,
                                        0x6e, 0x2d, 0x31, 0x20, 0x2d, 0x2a, 0x2d]))
        return FileType.latin1Text;
      return FileType.text;
    case '.pyc': return FileType.binary; // compiled Python bytecode
    // Machine code
    case '.so': return FileType.binary; // ELF shared object
    case '.xpt': return FileType.binary; // XPCOM Type Library
    // Graphics code
    case '.glsl': return FileType.text;
    case '.spvasm': return FileType.text;
    // Documentation
    case '.md': return FileType.text;
    case '.txt': return FileType.text;
    case '.html': return FileType.text;
    // Fonts
    case '.ttf': return FileType.binary; // TrueType Font
    case '.ttcf': // (mac)
    case '.ttc': return FileType.binary; // TrueType Collection (windows)
    case '.woff': return FileType.binary; // Web Open Font Format
    case '.otf': return FileType.binary; // OpenType Font
    // Graphics formats
    case '.gif': return FileType.binary; // GIF
    case '.png': return FileType.binary; // PNG
    case '.tga': return FileType.binary; // Truevision TGA (TARGA)
    case '.dng': return FileType.binary; // Digial Negative (Adobe RAW format)
    case '.jpg':
    case '.jpeg': return FileType.binary; // JPEG
    case '.ico': return FileType.binary; // Windows icon format
    case '.icns': return FileType.binary; // macOS icon format
    case '.bmp': return FileType.binary; // Windows bitmap format
    case '.wbmp': return FileType.binary; // Wireless bitmap format
    case '.webp': return FileType.binary; // WEBP
    case '.pdf': return FileType.binary; // PDF
    case '.emf': return FileType.binary; // Windows enhanced metafile format
    case '.skp': return FileType.binary; // Skia picture format
    case '.mskp': return FileType.binary; // Skia picture format
    // Videos
    case '.ogg': return FileType.binary; // Ogg media
    case '.mp4': return FileType.binary; // MPEG media
    case '.ts': return FileType.binary; // MPEG2 transport stream
    // Other binary files
    case '.raw': return FileType.binary; // raw audio or graphical data
    case '.bin': return FileType.binary; // some sort of binary data
    case '.rsc': return FileType.binary; // some sort of resource data
    case '.arsc': return FileType.binary; // Android compiled resources
    case '.apk': return FileType.zip; // Android Package
    case '.crx': return FileType.binary; // Chrome extension
    case '.keystore': return FileType.binary;
    case '.icc': return FileType.binary; // Color profile
    case '.swp': return FileType.binary; // Vim swap file
    // Archives
    case '.zip': return FileType.zip; // ZIP
    case '.tar': return FileType.tar; // Tar
    case '.gz': return FileType.gz; // GZip
    case '.bzip2': return FileType.bzip2; // BZip2
    // Image file types from the Fuchsia SDK.
    case '.blk':
    case '.vboot':
    case '.snapshot':
    case '.zbi':
      return FileType.binary;
    // Special cases
    case '.patch':
    case '.diff':
      // Don't try to read the copyright out of patch files, since there'll be fragments.
      return FileType.binary;
    case '.plist':
      // These commonly include the word "copyright" but in a way that isn't necessarily a copyright statement that applies to the file.
      // Since there's so few of them, and none have their own copyright statement, we just treat them as binary files.
      return FileType.binary;
  }
  bytes ??= reader();
  assert(bytes.isNotEmpty);
  if (matchesSignature(bytes, <int>[0x1F, 0x8B]))
    return FileType.gz; // GZip archive
  if (matchesSignature(bytes, <int>[0x42, 0x5A]))
    return FileType.bzip2; // BZip2 archive
  if (matchesSignature(bytes, <int>[0x42, 0x43]))
    return FileType.binary; // LLVM Bitcode
  if (matchesSignature(bytes, <int>[0xAC, 0xED]))
    return FileType.binary; // Java serialized object
  if (matchesSignature(bytes, <int>[0x4D, 0x5A]))
    return FileType.binary; // MZ executable (DOS, Windows PEs, etc)
  if (matchesSignature(bytes, <int>[0xFF, 0xD8, 0xFF]))
    return FileType.binary; // JPEG
  if (matchesSignature(bytes, <int>[-1, -1, 0xda, 0x27])) // -1 is a wildcard
    return FileType.binary; // ICU data files (.brk, .dict, etc)
  if (matchesSignature(bytes, <int>[0x03, 0x00, 0x08, 0x00]))
    return FileType.binary; // Android Binary XML
  if (matchesSignature(bytes, <int>[0x25, 0x50, 0x44, 0x46]))
    return FileType.binary; // PDF
  if (matchesSignature(bytes, <int>[0x43, 0x72, 0x32, 0x34]))
    return FileType.binary; // Chrome extension
  if (matchesSignature(bytes, <int>[0x4F, 0x67, 0x67, 0x53]))
    return FileType.binary; // Ogg media
  if (matchesSignature(bytes, <int>[0x50, 0x4B, 0x03, 0x04]))
    return FileType.zip; // ZIP archive
  if (matchesSignature(bytes, <int>[0x7F, 0x45, 0x4C, 0x46]))
    return FileType.binary; // ELF
  if (matchesSignature(bytes, <int>[0xCA, 0xFE, 0xBA, 0xBE]))
    return FileType.binary; // compiled Java bytecode (usually found inside .jar archives)
  if (matchesSignature(bytes, <int>[0xCE, 0xFA, 0xED, 0xFE]))
    return FileType.binary; // Mach-O binary, 32 bit, reverse byte ordering scheme
  if (matchesSignature(bytes, <int>[0xCF, 0xFA, 0xED, 0xFE]))
    return FileType.binary; // Mach-O binary, 64 bit, reverse byte ordering scheme
  if (matchesSignature(bytes, <int>[0xFE, 0xED, 0xFA, 0xCE]))
    return FileType.binary; // Mach-O binary, 32 bit
  if (matchesSignature(bytes, <int>[0xFE, 0xED, 0xFA, 0xCF]))
    return FileType.binary; // Mach-O binary, 64 bit
  if (matchesSignature(bytes, <int>[0x75, 0x73, 0x74, 0x61, 0x72]))
    return FileType.bzip2; // Tar
  if (matchesSignature(bytes, <int>[0x47, 0x49, 0x46, 0x38, 0x37, 0x61]))
    return FileType.binary; // GIF87a
  if (matchesSignature(bytes, <int>[0x47, 0x49, 0x46, 0x38, 0x39, 0x61]))
    return FileType.binary; // GIF89a
  if (matchesSignature(bytes, <int>[0x64, 0x65, 0x78, 0x0A, 0x30, 0x33, 0x35, 0x00]))
    return FileType.binary; // Dalvik Executable
  if (matchesSignature(bytes, <int>[0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E, 0x0A])) {
    // TODO(ianh): implement .ar parser, https://github.com/flutter/flutter/issues/25633
    return FileType.binary; // Unix archiver (ar)
  }
  if (matchesSignature(bytes, <int>[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0a]))
    return FileType.binary; // PNG
  if (matchesSignature(bytes, <int>[0x58, 0x50, 0x43, 0x4f, 0x4d, 0x0a, 0x54, 0x79, 0x70, 0x65, 0x4c, 0x69, 0x62, 0x0d, 0x0a, 0x1a]))
    return FileType.binary; // XPCOM Type Library
  return FileType.text;
}