ILRepack/ResReader.cs

using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Resources; using System.Runtime.Serialization; using System.Runtime.Serialization.Formatters.Binary; using System.Text; namespace ILRepacking { /* * Home-made Resource reader, that allows binary iteration over resources (without deserializing them). * Largely 'inspired' from MS ResourceReader */ // The Default Resource File Format (from MS) // // The fundamental problems addressed by the resource file format are: // // * Versioning - A ResourceReader could in theory support many different // file format revisions. // * Storing intrinsic datatypes (ie, ints, Strings, DateTimes, etc) in a compact // format // * Support for user-defined classes - Accomplished using Serialization // * Resource lookups should not require loading an entire resource file - If you // look up a resource, we only load the value for that resource, minimizing working set. // // // There are four sections to the default file format. The first // is the Resource Manager header, which consists of a magic number // that identifies this as a Resource file, and a ResourceSet class name. // The class name is written here to allow users to provide their own // implementation of a ResourceSet (and a matching ResourceReader) to // control policy. If objects greater than a certain size or matching a // certain naming scheme shouldn't be stored in memory, users can tweak that // with their own subclass of ResourceSet. // // The second section in the system default file format is the // RuntimeResourceSet specific header. This contains a version number for // the .resources file, the number of resources in this file, the number of // different types contained in the file, followed by a list of fully // qualified type names. After this, we include an array of hash values for // each resource name, then an array of virtual offsets into the name section // of the file. The hashes allow us to do a binary search on an array of // integers to find a resource name very quickly without doing many string // compares (except for once we find the real type, of course). If a hash // matches, the index into the array of hash values is used as the index // into the name position array to find the name of the resource. The type // table allows us to read multiple different classes from the same file, // including user-defined types, in a more efficient way than using // Serialization, at least when your .resources file contains a reasonable // proportion of base data types such as Strings or ints. We use // Serialization for all the non-instrinsic types. // // The third section of the file is the name section. It contains a // series of resource names, written out as byte-length prefixed little // endian Unicode strings (UTF-16). After each name is a four byte virtual // offset into the data section of the file, pointing to the relevant // string or serialized blob for this resource name. // // The fourth section in the file is the data section, which consists // of a type and a blob of bytes for each item in the file. The type is // an integer index into the type table. The data is specific to that type, // but may be a number written in binary format, a String, or a serialized // Object. // // The system default file format (V1) is as follows: // // What Type of Data // =================================================== =========== // // Resource Manager header // Magic Number (0xBEEFCACE) Int32 // Resource Manager header version Int32 // Num bytes to skip from here to get past this header Int32 // Class name of IResourceReader to parse this file String // Class name of ResourceSet to parse this file String // // RuntimeResourceReader header // ResourceReader version number Int32 // [Only in debug V2 builds - "***DEBUG***"] String // Number of resources in the file Int32 // Number of types in the type table Int32 // Name of each type Set of Strings // Padding bytes for 8-byte alignment (use PAD) Bytes (0-7) // Hash values for each resource name Int32 array, sorted // Virtual offset of each resource name Int32 array, coupled with hash values // Absolute location of Data section Int32 // // RuntimeResourceReader Name Section // Name & virtual offset of each resource Set of (UTF-16 String, Int32) pairs // // RuntimeResourceReader Data Section // Type and Value of each resource Set of (Int32, blob of bytes) pairs // // This implementation, when used with the default ResourceReader class, // loads only the strings that you look up for. It can do string comparisons // without having to create a new String instance due to some memory mapped // file optimizations in the ResourceReader and FastResourceComparer // classes. This keeps the memory we touch to a minimum when loading // resources. // // If you use a different IResourceReader class to read a file, or if you // do case-insensitive lookups (and the case-sensitive lookup fails) then // we will load all the names of each resource and each resource value. // This could probably use some optimization. // // In addition, this supports object serialization in a similar fashion. // We build an array of class types contained in this file, and write it // to RuntimeResourceReader header section of the file. Every resource // will contain its type (as an index into the array of classes) with the data // for that resource. We will use the Runtime's serialization support for this. // // All strings in the file format are written with BinaryReader and // BinaryWriter, which writes out the length of the String in bytes as an // Int32 then the contents as Unicode chars encoded in UTF-8. In the name // table though, each resource name is written in UTF-16 so we can do a // string compare byte by byte against the contents of the file, without // allocating objects. Ideally we'd have a way of comparing UTF-8 bytes // directly against a String object, but that may be a lot of work. // // The offsets of each resource string are relative to the beginning // of the Data section of the file. This way, if a tool decided to add // one resource to a file, it would only need to increment the number of // resources, add the hash & location of last byte in the name section // to the array of resource hashes and resource name positions (carefully // keeping these arrays sorted), add the name to the end of the name & // offset list, possibly add the type list of types types (and increase // the number of items in the type table), and add the resource value at // the end of the file. The other offsets wouldn't need to be updated to // reflect the longer header section. [Serializable] internal enum ResourceTypeCode { Null = 0, String = 1, Boolean = 2, Char = 3, Byte = 4, SByte = 5, Int16 = 6, UInt16 = 7, Int32 = 8, UInt32 = 9, Int64 = 10, UInt64 = 11, Single = 12, Double = 13, Decimal = 14, DateTime = 15, LastPrimitive = 16, TimeSpan = 16, ByteArray = 32, Stream = 33, StartOfUserTypes = 64, } internal class Res { public readonly String name; public readonly String type; public byte[] data; internal readonly int typeCode; internal readonly int dataPos; public Res(string name, string type, byte[] data, int typeCode, int dataPos) { this.name = name; this.type = type; this.data = data; this.typeCode = typeCode; this.dataPos = dataPos; } public bool IsBamlStream { get { return type == "ResourceTypeCode.Stream" && name != null && name.EndsWith(".baml"); } } public bool IsString { get { return type == "ResourceTypeCode.String" || type != null && type.StartsWith("System.String"); } } } internal sealed class ResReader : IEnumerable<Res>, IDisposable { private BinaryReader _store; // backing store we're reading from. private readonly long _nameSectionOffset; // Offset to name section of file. private readonly long _dataSectionOffset; // Offset to Data section of file. private readonly int _numResources; // Num of resources files, in case arrays aren't allocated. private readonly BinaryFormatter _bf; // Version number of .resources file, for compatibility private readonly int _version; private int[] _nameHashes; // hash values for all names. private int[] _namePositions; // relative locations of names private int[] _typeNamePositions; // To delay initialize type table public ResReader(Stream stream) { _store = new BinaryReader(stream, Encoding.UTF8); _bf = new BinaryFormatter(null, new StreamingContext(StreamingContextStates.File | StreamingContextStates.Persistence)); try { // Read ResourceManager header // Check for magic number int magicNum = _store.ReadInt32(); if (magicNum != ResourceManager.MagicNumber) throw new ArgumentException("Resources_StreamNotValid"); // Assuming this is ResourceManager header V1 or greater, hopefully // after the version number there is a number of bytes to skip // to bypass the rest of the ResMgr header. int resMgrHeaderVersion = _store.ReadInt32(); if (resMgrHeaderVersion > 1) { int numBytesToSkip = _store.ReadInt32(); _store.BaseStream.Seek(numBytesToSkip, SeekOrigin.Current); } else { SkipInt32(); // We don't care about numBytesToSkip. // Read in type name for a suitable ResourceReader // Note ResourceWriter & InternalResGen use different Strings. String readerType = _store.ReadString(); // Skip over type name for a suitable ResourceSet SkipString(); } // Read RuntimeResourceSet header // Do file version check int version = _store.ReadInt32(); if (version != 2 && version != 1) throw new ArgumentException("Arg_ResourceFileUnsupportedVersion"); _version = version; _numResources = _store.ReadInt32(); // Read type positions into type positions array. // But delay initialize the type table. int numTypes = _store.ReadInt32(); _typeNamePositions = new int[numTypes]; for (int i = 0; i < numTypes; i++) { _typeNamePositions[i] = (int)_store.BaseStream.Position; // Skip over the Strings in the file. Don't create types. SkipString(); } // Prepare to read in the array of name hashes // Note that the name hashes array is aligned to 8 bytes so // we can use pointers into it on 64 bit machines. (4 bytes // may be sufficient, but let's plan for the future) // Skip over alignment stuff. All public .resources files // should be aligned No need to verify the byte values. long pos = _store.BaseStream.Position; int alignBytes = ((int)pos) & 7; if (alignBytes != 0) { for (int i = 0; i < 8 - alignBytes; i++) { _store.ReadByte(); } } // Read in the array of name hashes _nameHashes = new int[_numResources]; for (int i = 0; i < _numResources; i++) _nameHashes[i] = _store.ReadInt32(); // Read in the array of relative positions for all the names. _namePositions = new int[_numResources]; for (int i = 0; i < _numResources; i++) _namePositions[i] = _store.ReadInt32(); // Read location of data section. _dataSectionOffset = _store.ReadInt32(); // Store current location as start of name section _nameSectionOffset = _store.BaseStream.Position; } catch (EndOfStreamException) { throw new BadImageFormatException("BadImageFormat_ResourcesHeaderCorrupted"); } catch (IndexOutOfRangeException) { throw new BadImageFormatException("BadImageFormat_ResourcesHeaderCorrupted"); } } public void Close() { Dispose(true); } void IDisposable.Dispose() { Dispose(true); } private void Dispose(bool disposing) { if (_store != null) { if (disposing) _store.Close(); _store = null; _namePositions = null; _typeNamePositions = null; _nameHashes = null; } } private void SkipInt32() { _store.BaseStream.Seek(4, SeekOrigin.Current); } private void SkipString() { int stringLength = Read7BitEncodedInt(); _store.BaseStream.Seek(stringLength, SeekOrigin.Current); } private int GetNamePosition(int index) { int r = _namePositions[index]; if (r < 0 || r > _dataSectionOffset - _nameSectionOffset) { throw new FormatException("BadImageFormat_ResourcesNameOutOfSection"); } return r; } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } public IEnumerator<Res> GetEnumerator() { return GetResources(); } public Object GetObject(Res res) { if (_version == 1) return GetObject_V1(res); return GetObject_V2(res); } private Object GetObject_V2(Res res) { lock (this) { _store.BaseStream.Seek(_dataSectionOffset + res.dataPos, SeekOrigin.Begin); ResourceTypeCode typeCode = (ResourceTypeCode)Read7BitEncodedInt(); switch (typeCode) { case ResourceTypeCode.Null: return null; case ResourceTypeCode.String: return _store.ReadString(); case ResourceTypeCode.Boolean: return _store.ReadBoolean(); case ResourceTypeCode.Char: return (char)_store.ReadUInt16(); case ResourceTypeCode.Byte: return _store.ReadByte(); case ResourceTypeCode.SByte: return _store.ReadSByte(); case ResourceTypeCode.Int16: return _store.ReadInt16(); case ResourceTypeCode.UInt16: return _store.ReadUInt16(); case ResourceTypeCode.Int32: return _store.ReadInt32(); case ResourceTypeCode.UInt32: return _store.ReadUInt32(); case ResourceTypeCode.Int64: return _store.ReadInt64(); case ResourceTypeCode.UInt64: return _store.ReadUInt64(); case ResourceTypeCode.Single: return _store.ReadSingle(); case ResourceTypeCode.Double: return _store.ReadDouble(); case ResourceTypeCode.Decimal: return _store.ReadDecimal(); case ResourceTypeCode.DateTime: // Use DateTime's ToBinary & FromBinary. Int64 data = _store.ReadInt64(); return DateTime.FromBinary(data); case ResourceTypeCode.TimeSpan: Int64 ticks = _store.ReadInt64(); return new TimeSpan(ticks); // Special types case ResourceTypeCode.ByteArray: { int len = _store.ReadInt32(); return _store.ReadBytes(len); } case ResourceTypeCode.Stream: { int len = _store.ReadInt32(); byte[] bytes = _store.ReadBytes(len); // Lifetime of memory == lifetime of this stream. return new MemoryStream(bytes); } } // Normal serialized objects return _bf.Deserialize(_store.BaseStream); } } private Object GetObject_V1(Res res) { lock (this) { _store.BaseStream.Seek(_dataSectionOffset + res.dataPos, SeekOrigin.Begin); int typeIndex = Read7BitEncodedInt(); if (typeIndex == -1) return null; var typeName = TypeNameFromTypeIndex(typeIndex); var type = Type.GetType(typeName, true); if (type == typeof(string)) return this._store.ReadString(); if (type == typeof(int)) return this._store.ReadInt32(); if (type == typeof(byte)) return this._store.ReadByte(); if (type == typeof(sbyte)) return this._store.ReadSByte(); if (type == typeof(short)) return this._store.ReadInt16(); if (type == typeof(long)) return this._store.ReadInt64(); if (type == typeof(ushort)) return this._store.ReadUInt16(); if (type == typeof(uint)) return this._store.ReadUInt32(); if (type == typeof(ulong)) return this._store.ReadUInt64(); if (type == typeof(float)) return this._store.ReadSingle(); if (type == typeof(double)) return this._store.ReadDouble(); if (type == typeof(DateTime)) return new DateTime(this._store.ReadInt64()); if (type == typeof(TimeSpan)) return new TimeSpan(this._store.ReadInt64()); if (type == typeof(decimal)) { int[] array = new int[4]; for (int i = 0; i < array.Length; i++) { array[i] = this._store.ReadInt32(); } return new decimal(array); } // Normal serialized objects return _bf.Deserialize(_store.BaseStream); } } internal IEnumerator<Res> GetResources() { // Get the type information from the data section. Also, // sort all of the data section's indexes to compute length of // the serialized data for this type (making sure to subtract // off the length of the type code). KeyValuePair<int, string>[] dataPositionsAndNames = new KeyValuePair<int, string>[_numResources]; lock (this) { // Read all the positions of data within the data section. for (int i = 0; i < _numResources; i++) { _store.BaseStream.Position = _nameSectionOffset + GetNamePosition(i); // Skip over name of resource int byteLen = Read7BitEncodedInt(); var bytes = _store.ReadBytes(byteLen); if (bytes.Length != byteLen) throw new FormatException("BadImageFormat_ResourceNameCorrupted_NameIndex"); dataPositionsAndNames[i] = new KeyValuePair<int, string>(_store.ReadInt32(), Encoding.Unicode.GetString(bytes, 0, byteLen)); } Array.Sort(dataPositionsAndNames, (a,b) => a.Key-b.Key); for (int i = 0; i < _numResources; i++) { int dataPos = dataPositionsAndNames[i].Key; long nextData = (i < _numResources - 1) ? dataPositionsAndNames[i + 1].Key + _dataSectionOffset : _store.BaseStream.Length; // Read type code then byte[] _store.BaseStream.Position = _dataSectionOffset + dataPos; int typeCode = Read7BitEncodedInt(); string resourceType = TypeNameFromTypeCode(typeCode); int len = (int)(nextData - _store.BaseStream.Position); byte[] bytes = _store.ReadBytes(len); if (bytes.Length != len) throw new FormatException("BadImageFormat_ResourceNameCorrupted"); yield return new Res(dataPositionsAndNames[i].Value, resourceType, bytes, typeCode, dataPos); } } yield break; } internal int Read7BitEncodedInt() { // Read out an Int32 7 bits at a time. The high bit // of the byte when on means to continue reading more bytes. int count = 0; int shift = 0; byte b; do { // Check for a corrupted stream. Read a max of 5 bytes. // In a future version, add a DataFormatException. if (shift == 5 * 7) // 5 bytes max per Int32, shift += 7 throw new FormatException("Format_Bad7BitInt32"); // ReadByte handles end of stream cases for us. b = _store.ReadByte(); count |= (b & 0x7F) << shift; shift += 7; } while ((b & 0x80) != 0); return count; } private String TypeNameFromTypeIndex(int typeIndex) { long oldPos = _store.BaseStream.Position; try { _store.BaseStream.Position = _typeNamePositions[typeIndex]; return _store.ReadString(); } finally { _store.BaseStream.Position = oldPos; } } private String TypeNameFromTypeCode(int typeCode) { if (_version == 1) { return TypeNameFromTypeIndex(typeCode); } // _version == 2 var tc = (ResourceTypeCode) typeCode; if (tc < ResourceTypeCode.StartOfUserTypes) { return "ResourceTypeCode." + tc; } else { return TypeNameFromTypeIndex(tc - ResourceTypeCode.StartOfUserTypes); } } } }

ILRepack/ResReader.cs (366 lines of code) (raw):