extensions/windows-event-log/wel/JSONUtils.cpp (176 lines of code) (raw):

/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "JSONUtils.h" #include <algorithm> #include <string> #include <pugixml.hpp> #include "rapidjson/document.h" #include "rapidjson/writer.h" #include "rapidjson/stringbuffer.h" #include "utils/gsl.h" #include "utils/StringUtils.h" namespace org::apache::nifi::minifi::wel { namespace { rapidjson::Value xmlElementToJSON(const pugi::xml_node& node, rapidjson::Document& doc) { gsl_Expects(node.type() == pugi::xml_node_type::node_element); rapidjson::Value object(rapidjson::kObjectType); object.AddMember("name", rapidjson::StringRef(node.name()), doc.GetAllocator()); auto& attributes = object.AddMember("attributes", rapidjson::kObjectType, doc.GetAllocator())["attributes"]; for (const auto& attr : node.attributes()) { attributes.AddMember(rapidjson::StringRef(attr.name()), rapidjson::StringRef(attr.value()), doc.GetAllocator()); } auto& children = object.AddMember("children", rapidjson::kArrayType, doc.GetAllocator())["children"]; for (const auto& child : node.children()) { if (child.type() == pugi::xml_node_type::node_element) { children.PushBack(xmlElementToJSON(child, doc), doc.GetAllocator()); } } object.AddMember("text", rapidjson::StringRef(node.text().get()), doc.GetAllocator()); return object; } rapidjson::Value xmlDocumentToJSON(const pugi::xml_node& node, rapidjson::Document& doc) { gsl_Expects(node.type() == pugi::xml_node_type::node_document); rapidjson::Value children(rapidjson::kArrayType); for (const auto& child : node.children()) { if (child.type() == pugi::xml_node_type::node_element) { children.PushBack(xmlElementToJSON(child, doc), doc.GetAllocator()); } } return children; } void simplifiedGenericXmlToJson(const pugi::xml_node& source_node, rapidjson::Value& output_value, rapidjson::Document::AllocatorType& allocator, std::optional<std::string> prefix_for_flat_structure) { gsl_Expects(source_node.type() == pugi::xml_node_type::node_element); const bool is_flattened = prefix_for_flat_structure.has_value(); for (const auto& attr : source_node.attributes()) { if (attr.name() == std::string_view{"xmlns"}) { continue; // skip xmlns attribute, because it's metadata } if (!is_flattened) { output_value.AddMember(rapidjson::StringRef(attr.name()), rapidjson::StringRef(attr.value()), allocator); } else { output_value.AddMember(rapidjson::Value(*prefix_for_flat_structure + attr.name(), allocator).Move(), rapidjson::StringRef(attr.value()), allocator); } } for (const auto& child: source_node.children()) { if (child.type() == pugi::xml_node_type::node_element) { const auto is_pcdata = [](const pugi::xml_node& node) { return node.type() == pugi::xml_node_type::node_pcdata; }; if (std::all_of(child.children().begin(), child.children().end(), is_pcdata)) { // all children are pcdata (text): leaf node if (!is_flattened) { output_value.AddMember(rapidjson::StringRef(child.name()), rapidjson::StringRef(child.text().get()), allocator); } else { output_value.AddMember(rapidjson::Value(*prefix_for_flat_structure + child.name(), allocator).Move(), rapidjson::StringRef(child.text().get()), allocator); } } else { // there are non-text children: recurse further auto& child_val = is_flattened ? output_value : output_value.AddMember(rapidjson::StringRef(child.name()), rapidjson::kObjectType, allocator)[child.name()]; auto new_prefix = is_flattened ? std::optional(*prefix_for_flat_structure + child.name() + ".") : std::nullopt; simplifiedGenericXmlToJson(child, child_val, allocator, new_prefix); } } } } std::string createUniqueKey(const std::string& key, const rapidjson::Value& parent) { auto proposed_key = key; size_t postfix = 1; while (parent.HasMember(proposed_key)) { proposed_key = key + std::to_string(postfix++); } return proposed_key; } rapidjson::Document toJSONImpl(const pugi::xml_node& root, bool flatten) { rapidjson::Document doc{rapidjson::kObjectType}; auto event_xml = root.child("Event"); { auto system_xml = event_xml.child("System"); auto& system = flatten ? doc : doc.AddMember("System", rapidjson::kObjectType, doc.GetAllocator())["System"]; { auto provider_xml = system_xml.child("Provider"); auto& provider = flatten ? doc : system.AddMember("Provider", rapidjson::kObjectType, doc.GetAllocator())["Provider"]; provider.AddMember("Name", rapidjson::StringRef(provider_xml.attribute("Name").value()), doc.GetAllocator()); provider.AddMember("Guid", rapidjson::StringRef(provider_xml.attribute("Guid").value()), doc.GetAllocator()); } system.AddMember("EventID", rapidjson::StringRef(system_xml.child("EventID").text().get()), doc.GetAllocator()); system.AddMember("Version", rapidjson::StringRef(system_xml.child("Version").text().get()), doc.GetAllocator()); system.AddMember("Level", rapidjson::StringRef(system_xml.child("Level").text().get()), doc.GetAllocator()); system.AddMember("Task", rapidjson::StringRef(system_xml.child("Task").text().get()), doc.GetAllocator()); system.AddMember("Opcode", rapidjson::StringRef(system_xml.child("Opcode").text().get()), doc.GetAllocator()); system.AddMember("Keywords", rapidjson::StringRef(system_xml.child("Keywords").text().get()), doc.GetAllocator()); { auto timeCreated_xml = system_xml.child("TimeCreated"); auto& timeCreated = flatten ? doc : system.AddMember("TimeCreated", rapidjson::kObjectType, doc.GetAllocator())["TimeCreated"]; timeCreated.AddMember("SystemTime", rapidjson::StringRef(timeCreated_xml.attribute("SystemTime").value()), doc.GetAllocator()); } system.AddMember("EventRecordID", rapidjson::StringRef(system_xml.child("EventRecordID").text().get()), doc.GetAllocator()); { auto correlation_xml = system_xml.child("Correlation"); auto& correlation = flatten ? doc : system.AddMember("Correlation", rapidjson::kObjectType, doc.GetAllocator())["Correlation"]; const auto activity_id = correlation_xml.attribute("ActivityID"); if (!activity_id.empty()) { correlation.AddMember("ActivityID", rapidjson::StringRef(activity_id.value()), doc.GetAllocator()); } } { auto execution_xml = system_xml.child("Execution"); auto& execution = flatten ? doc : system.AddMember("Execution", rapidjson::kObjectType, doc.GetAllocator())["Execution"]; execution.AddMember("ProcessID", rapidjson::StringRef(execution_xml.attribute("ProcessID").value()), doc.GetAllocator()); execution.AddMember("ThreadID", rapidjson::StringRef(execution_xml.attribute("ThreadID").value()), doc.GetAllocator()); } system.AddMember("Channel", rapidjson::StringRef(system_xml.child("Channel").text().get()), doc.GetAllocator()); system.AddMember("Computer", rapidjson::StringRef(system_xml.child("Computer").text().get()), doc.GetAllocator()); { auto security_xml = system_xml.child("Security"); auto& security = flatten ? doc : system.AddMember("Security", rapidjson::kObjectType, doc.GetAllocator())["Security"]; security.AddMember("UserID", rapidjson::StringRef(security_xml.attribute("UserID").value()), doc.GetAllocator()); } } { auto eventData_xml = event_xml.child("EventData"); if (flatten) { for (const auto& event_data_child : eventData_xml.children()) { std::string key = "EventData"; if (auto name_attr = event_data_child.attribute("Name"); !name_attr.empty()) { key = utils::StringUtils::join_pack(key, ".", name_attr.value()); } doc.AddMember(rapidjson::Value(createUniqueKey(key, doc), doc.GetAllocator()).Move(), rapidjson::StringRef(event_data_child.text().get()), doc.GetAllocator()); } } else { doc.AddMember("EventData", rapidjson::kArrayType, doc.GetAllocator()); for (const auto& event_data_child : eventData_xml.children()) { auto name_attr = event_data_child.attribute("Name"); rapidjson::Value item(rapidjson::kObjectType); item.AddMember("Name", rapidjson::StringRef(name_attr.value()), doc.GetAllocator()); item.AddMember("Content", rapidjson::StringRef(event_data_child.text().get()), doc.GetAllocator()); item.AddMember("Type", rapidjson::StringRef(event_data_child.name()), doc.GetAllocator()); doc["EventData"].PushBack(item, doc.GetAllocator()); // we need to re-query EventData because a reference to it wouldn't be stable } } } const auto userdata_xml = event_xml.child("UserData"); if (!userdata_xml.empty()) { auto& userdata = flatten ? doc : doc.AddMember("UserData", rapidjson::kObjectType, doc.GetAllocator())["UserData"]; auto prefix = flatten ? std::optional("UserData.") : std::nullopt; simplifiedGenericXmlToJson(userdata_xml, userdata, doc.GetAllocator(), prefix); } return doc; } } // namespace rapidjson::Document toRawJSON(const pugi::xml_node& root) { rapidjson::Document doc; if (root.type() == pugi::xml_node_type::node_document) { static_cast<rapidjson::Value&>(doc) = xmlDocumentToJSON(root, doc); } return doc; } rapidjson::Document toSimpleJSON(const pugi::xml_node& root) { return toJSONImpl(root, false); } rapidjson::Document toFlattenedJSON(const pugi::xml_node& root) { return toJSONImpl(root, true); } std::string jsonToString(const rapidjson::Document& doc) { rapidjson::StringBuffer buffer; rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); doc.Accept(writer); return buffer.GetString(); } } // namespace org::apache::nifi::minifi::wel