inline property_info parser::find_property()

in krabs/krabs/parser.hpp [122:197]


    inline property_info parser::find_property(const std::wstring &name)
    {
        // A schema contains a collection of properties that are keyed by name.
        // These properties are stored in a blob of bytes that needs to be
        // interpreted according to information that is packaged up in the
        // schema and that can be retrieved using the Tdh* APIs. This format
        // requires a linear traversal over the blob, incrementing according to
        // the contents within it. This is janky, so our strategy is to
        // minimize this as much as possible via caching.

        // The first step is to use our cache for the property to see if we've
        // discovered it already.
        for (auto &item : propertyCache_) {
            if (name == item.first) {
                return item.second;
            }
        }

        const ULONG totalPropCount = schema_.pSchema_->PropertyCount;

        assert((pBufferIndex_ <= pEndBuffer_ && pBufferIndex_ >= schema_.record_.UserData) &&
               "invariant: we should've already thrown for falling off the edge");

        // accept that last property can be omitted from buffer. this happens if last property
        // is string but empty and the provider strips the null terminator
        assert((pBufferIndex_ == pEndBuffer_ ? ((totalPropCount - lastPropertyIndex_) <= 1)
                                             : true)
               && "invariant: if we've exhausted our buffer, then we must've"
                  "exhausted the properties as well");

        // We've not looked up this property before, so we have to do the work
        // to find it. While we're going through the blob to find it, we'll
        // remember what we've seen to save time later.
        //
        // Question: Why don't we just populate the cache before looking up any
        //           properties and simplify our code (less state, etc)?
        //
        // Answer:   Doing that introduces overhead in the case that only a
        //           subset of properties are needed. While this code is a bit
        //           more complicated, we introduce no additional performance
        //           overhead at runtime.
        for (auto &i = lastPropertyIndex_; i < totalPropCount; ++i) {

            auto &currentPropInfo = schema_.pSchema_->EventPropertyInfoArray[i];
            const wchar_t *pName = reinterpret_cast<const wchar_t*>(
                                        reinterpret_cast<BYTE*>(schema_.pSchema_) +
                                        currentPropInfo.NameOffset);

            ULONG propertyLength = size_provider::get_property_size(
                                        pBufferIndex_,
                                        pName,
                                        schema_.record_,
                                        currentPropInfo);

            // verify that the length of the property doesn't exceed the buffer
            if (pBufferIndex_ + propertyLength > pEndBuffer_) {
                throw std::out_of_range("Property length past end of property buffer");
            }

            property_info propInfo(pBufferIndex_, currentPropInfo, propertyLength);
            cache_property(pName, propInfo);

            // advance the buffer index since we've already processed this property
            pBufferIndex_ += propertyLength;

            // The property was found, return it
            if (name == pName) {
                // advance the index since we've already processed this property
                ++i;
                return propInfo;
            }
        }

        // property wasn't found, return an empty propInfo
        return property_info();
    }