extensions/windows-event-log/wel/JSONUtils.cpp (176 lines of code) (raw):
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "JSONUtils.h"
#include <algorithm>
#include <string>
#include <pugixml.hpp>
#include "rapidjson/document.h"
#include "rapidjson/writer.h"
#include "rapidjson/stringbuffer.h"
#include "utils/gsl.h"
#include "utils/StringUtils.h"
namespace org::apache::nifi::minifi::wel {
namespace {
rapidjson::Value xmlElementToJSON(const pugi::xml_node& node, rapidjson::Document& doc) {
gsl_Expects(node.type() == pugi::xml_node_type::node_element);
rapidjson::Value object(rapidjson::kObjectType);
object.AddMember("name", rapidjson::StringRef(node.name()), doc.GetAllocator());
auto& attributes = object.AddMember("attributes", rapidjson::kObjectType, doc.GetAllocator())["attributes"];
for (const auto& attr : node.attributes()) {
attributes.AddMember(rapidjson::StringRef(attr.name()), rapidjson::StringRef(attr.value()), doc.GetAllocator());
}
auto& children = object.AddMember("children", rapidjson::kArrayType, doc.GetAllocator())["children"];
for (const auto& child : node.children()) {
if (child.type() == pugi::xml_node_type::node_element) {
children.PushBack(xmlElementToJSON(child, doc), doc.GetAllocator());
}
}
object.AddMember("text", rapidjson::StringRef(node.text().get()), doc.GetAllocator());
return object;
}
rapidjson::Value xmlDocumentToJSON(const pugi::xml_node& node, rapidjson::Document& doc) {
gsl_Expects(node.type() == pugi::xml_node_type::node_document);
rapidjson::Value children(rapidjson::kArrayType);
for (const auto& child : node.children()) {
if (child.type() == pugi::xml_node_type::node_element) {
children.PushBack(xmlElementToJSON(child, doc), doc.GetAllocator());
}
}
return children;
}
void simplifiedGenericXmlToJson(const pugi::xml_node& source_node,
rapidjson::Value& output_value,
rapidjson::Document::AllocatorType& allocator,
std::optional<std::string> prefix_for_flat_structure) {
gsl_Expects(source_node.type() == pugi::xml_node_type::node_element);
const bool is_flattened = prefix_for_flat_structure.has_value();
for (const auto& attr : source_node.attributes()) {
if (attr.name() == std::string_view{"xmlns"}) {
continue; // skip xmlns attribute, because it's metadata
}
if (!is_flattened) {
output_value.AddMember(rapidjson::StringRef(attr.name()), rapidjson::StringRef(attr.value()), allocator);
} else {
output_value.AddMember(rapidjson::Value(*prefix_for_flat_structure + attr.name(), allocator).Move(), rapidjson::StringRef(attr.value()), allocator);
}
}
for (const auto& child: source_node.children()) {
if (child.type() == pugi::xml_node_type::node_element) {
const auto is_pcdata = [](const pugi::xml_node& node) { return node.type() == pugi::xml_node_type::node_pcdata; };
if (std::all_of(child.children().begin(), child.children().end(), is_pcdata)) {
// all children are pcdata (text): leaf node
if (!is_flattened) {
output_value.AddMember(rapidjson::StringRef(child.name()), rapidjson::StringRef(child.text().get()), allocator);
} else {
output_value.AddMember(rapidjson::Value(*prefix_for_flat_structure + child.name(), allocator).Move(), rapidjson::StringRef(child.text().get()), allocator);
}
} else {
// there are non-text children: recurse further
auto& child_val = is_flattened ? output_value : output_value.AddMember(rapidjson::StringRef(child.name()), rapidjson::kObjectType, allocator)[child.name()];
auto new_prefix = is_flattened ? std::optional(*prefix_for_flat_structure + child.name() + ".") : std::nullopt;
simplifiedGenericXmlToJson(child, child_val, allocator, new_prefix);
}
}
}
}
std::string createUniqueKey(const std::string& key, const rapidjson::Value& parent) {
auto proposed_key = key;
size_t postfix = 1;
while (parent.HasMember(proposed_key)) {
proposed_key = key + std::to_string(postfix++);
}
return proposed_key;
}
rapidjson::Document toJSONImpl(const pugi::xml_node& root, bool flatten) {
rapidjson::Document doc{rapidjson::kObjectType};
auto event_xml = root.child("Event");
{
auto system_xml = event_xml.child("System");
auto& system = flatten ? doc : doc.AddMember("System", rapidjson::kObjectType, doc.GetAllocator())["System"];
{
auto provider_xml = system_xml.child("Provider");
auto& provider = flatten ? doc : system.AddMember("Provider", rapidjson::kObjectType, doc.GetAllocator())["Provider"];
provider.AddMember("Name", rapidjson::StringRef(provider_xml.attribute("Name").value()), doc.GetAllocator());
provider.AddMember("Guid", rapidjson::StringRef(provider_xml.attribute("Guid").value()), doc.GetAllocator());
}
system.AddMember("EventID", rapidjson::StringRef(system_xml.child("EventID").text().get()), doc.GetAllocator());
system.AddMember("Version", rapidjson::StringRef(system_xml.child("Version").text().get()), doc.GetAllocator());
system.AddMember("Level", rapidjson::StringRef(system_xml.child("Level").text().get()), doc.GetAllocator());
system.AddMember("Task", rapidjson::StringRef(system_xml.child("Task").text().get()), doc.GetAllocator());
system.AddMember("Opcode", rapidjson::StringRef(system_xml.child("Opcode").text().get()), doc.GetAllocator());
system.AddMember("Keywords", rapidjson::StringRef(system_xml.child("Keywords").text().get()), doc.GetAllocator());
{
auto timeCreated_xml = system_xml.child("TimeCreated");
auto& timeCreated = flatten ? doc : system.AddMember("TimeCreated", rapidjson::kObjectType, doc.GetAllocator())["TimeCreated"];
timeCreated.AddMember("SystemTime", rapidjson::StringRef(timeCreated_xml.attribute("SystemTime").value()), doc.GetAllocator());
}
system.AddMember("EventRecordID", rapidjson::StringRef(system_xml.child("EventRecordID").text().get()), doc.GetAllocator());
{
auto correlation_xml = system_xml.child("Correlation");
auto& correlation = flatten ? doc : system.AddMember("Correlation", rapidjson::kObjectType, doc.GetAllocator())["Correlation"];
const auto activity_id = correlation_xml.attribute("ActivityID");
if (!activity_id.empty()) {
correlation.AddMember("ActivityID", rapidjson::StringRef(activity_id.value()), doc.GetAllocator());
}
}
{
auto execution_xml = system_xml.child("Execution");
auto& execution = flatten ? doc : system.AddMember("Execution", rapidjson::kObjectType, doc.GetAllocator())["Execution"];
execution.AddMember("ProcessID", rapidjson::StringRef(execution_xml.attribute("ProcessID").value()), doc.GetAllocator());
execution.AddMember("ThreadID", rapidjson::StringRef(execution_xml.attribute("ThreadID").value()), doc.GetAllocator());
}
system.AddMember("Channel", rapidjson::StringRef(system_xml.child("Channel").text().get()), doc.GetAllocator());
system.AddMember("Computer", rapidjson::StringRef(system_xml.child("Computer").text().get()), doc.GetAllocator());
{
auto security_xml = system_xml.child("Security");
auto& security = flatten ? doc : system.AddMember("Security", rapidjson::kObjectType, doc.GetAllocator())["Security"];
security.AddMember("UserID", rapidjson::StringRef(security_xml.attribute("UserID").value()), doc.GetAllocator());
}
}
{
auto eventData_xml = event_xml.child("EventData");
if (flatten) {
for (const auto& event_data_child : eventData_xml.children()) {
std::string key = "EventData";
if (auto name_attr = event_data_child.attribute("Name"); !name_attr.empty()) {
key = utils::StringUtils::join_pack(key, ".", name_attr.value());
}
doc.AddMember(rapidjson::Value(createUniqueKey(key, doc), doc.GetAllocator()).Move(), rapidjson::StringRef(event_data_child.text().get()), doc.GetAllocator());
}
} else {
doc.AddMember("EventData", rapidjson::kArrayType, doc.GetAllocator());
for (const auto& event_data_child : eventData_xml.children()) {
auto name_attr = event_data_child.attribute("Name");
rapidjson::Value item(rapidjson::kObjectType);
item.AddMember("Name", rapidjson::StringRef(name_attr.value()), doc.GetAllocator());
item.AddMember("Content", rapidjson::StringRef(event_data_child.text().get()), doc.GetAllocator());
item.AddMember("Type", rapidjson::StringRef(event_data_child.name()), doc.GetAllocator());
doc["EventData"].PushBack(item, doc.GetAllocator()); // we need to re-query EventData because a reference to it wouldn't be stable
}
}
}
const auto userdata_xml = event_xml.child("UserData");
if (!userdata_xml.empty()) {
auto& userdata = flatten ? doc : doc.AddMember("UserData", rapidjson::kObjectType, doc.GetAllocator())["UserData"];
auto prefix = flatten ? std::optional("UserData.") : std::nullopt;
simplifiedGenericXmlToJson(userdata_xml, userdata, doc.GetAllocator(), prefix);
}
return doc;
}
} // namespace
rapidjson::Document toRawJSON(const pugi::xml_node& root) {
rapidjson::Document doc;
if (root.type() == pugi::xml_node_type::node_document) {
static_cast<rapidjson::Value&>(doc) = xmlDocumentToJSON(root, doc);
}
return doc;
}
rapidjson::Document toSimpleJSON(const pugi::xml_node& root) {
return toJSONImpl(root, false);
}
rapidjson::Document toFlattenedJSON(const pugi::xml_node& root) {
return toJSONImpl(root, true);
}
std::string jsonToString(const rapidjson::Document& doc) {
rapidjson::StringBuffer buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
doc.Accept(writer);
return buffer.GetString();
}
} // namespace org::apache::nifi::minifi::wel