sdk/tables/azure-data-tables/src/xml_wrapper.cpp (553 lines of code) (raw):

// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "private/xml_wrapper.hpp" #include <azure/core/platform.hpp> #include <cstring> #include <limits> #include <memory> #include <stdexcept> #if defined(AZ_PLATFORM_WINDOWS) #if !defined(WIN32_LEAN_AND_MEAN) #define WIN32_LEAN_AND_MEAN #endif #if !defined(NOMINMAX) #define NOMINMAX #endif #include <webservices.h> #else // libxml2 uses ICU. ICU 75.1 or later requires C++17 but we use // C++14. It causes an error. We can disable ICU C++ API to avoid it // because we don't need ICU C++ API. #define U_SHOW_CPLUSPLUS_API 0 #include <libxml/xmlreader.h> #include <libxml/xmlwriter.h> #endif namespace Azure { namespace Data { namespace Tables { namespace _detail { namespace Xml { #if defined(AZ_PLATFORM_WINDOWS) struct XmlReaderContext { XmlReaderContext() { HRESULT ret = WsCreateError(nullptr, 0, &error); if (ret != NO_ERROR) { throw std::runtime_error("Failed to initialize xml reader."); } ret = WsCreateReader(nullptr, 0, &reader, error); if (ret != NO_ERROR) { WsFreeError(error); throw std::runtime_error("Failed to initialize xml reader."); } } XmlReaderContext(const XmlReaderContext&) = delete; XmlReaderContext& operator=(const XmlReaderContext&) = delete; ~XmlReaderContext() { WsFreeReader(reader); WsFreeError(error); } WS_XML_READER* reader = nullptr; WS_ERROR* error = nullptr; bool readingAttributes = false; ULONG attributeIndex = 0; const WS_XML_ELEMENT_NODE* attributeElementNode = nullptr; }; XmlReader::XmlReader(const char* data, size_t length) { if (length > static_cast<size_t>((std::numeric_limits<ULONG>::max)())) { throw std::runtime_error("Xml data too big."); } auto context = std::make_unique<XmlReaderContext>(); WS_XML_READER_BUFFER_INPUT bufferInput; ZeroMemory(&bufferInput, sizeof(bufferInput)); bufferInput.input.inputType = WS_XML_READER_INPUT_TYPE_BUFFER; bufferInput.encodedData = const_cast<char*>(data); bufferInput.encodedDataSize = static_cast<ULONG>(length); WS_XML_READER_TEXT_ENCODING textEncoding; ZeroMemory(&textEncoding, sizeof(textEncoding)); textEncoding.encoding.encodingType = WS_XML_READER_ENCODING_TYPE_TEXT; textEncoding.charSet = WS_CHARSET_AUTO; HRESULT ret = WsSetInput( context->reader, &textEncoding.encoding, &bufferInput.input, nullptr, 0, context->error); if (ret != S_OK) { throw std::runtime_error("Failed to initialize xml reader."); } WS_CHARSET charSet; ret = WsGetReaderProperty( context->reader, WS_XML_READER_PROPERTY_CHARSET, &charSet, sizeof(charSet), context->error); if (ret != S_OK) { throw std::runtime_error("Failed to get xml encoding."); } if (charSet != WS_CHARSET_UTF8) { throw std::runtime_error("Unsupported xml encoding."); } m_context = context.release(); } XmlReader::~XmlReader() { if (m_context) { delete static_cast<XmlReaderContext*>(m_context); } } XmlNode XmlReader::Read() { auto context = static_cast<XmlReaderContext*>(m_context); auto moveToNext = [&]() { HRESULT ret = WsReadNode(context->reader, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to parse xml."); } }; if (context->readingAttributes) { const WS_XML_ATTRIBUTE* attribute = context->attributeElementNode->attributes[context->attributeIndex]; std::string name( reinterpret_cast<const char*>(attribute->localName->bytes), attribute->localName->length); if (attribute->value->textType != WS_XML_TEXT_TYPE_UTF8) { throw std::runtime_error("Unsupported xml encoding."); } const WS_XML_UTF8_TEXT* utf8Text = reinterpret_cast<const WS_XML_UTF8_TEXT*>(attribute->value); std::string value( reinterpret_cast<const char*>(utf8Text->value.bytes), utf8Text->value.length); if (++context->attributeIndex == context->attributeElementNode->attributeCount) { moveToNext(); context->readingAttributes = false; context->attributeElementNode = nullptr; context->attributeIndex = 0; } return XmlNode{XmlNodeType::Attribute, std::move(name), std::move(value)}; } const WS_XML_NODE* node; HRESULT ret = WsGetReaderNode(context->reader, &node, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to parse xml."); } switch (node->nodeType) { case WS_XML_NODE_TYPE_ELEMENT: { const WS_XML_ELEMENT_NODE* elementNode = reinterpret_cast<const WS_XML_ELEMENT_NODE*>(node); std::string name( reinterpret_cast<const char*>(elementNode->localName->bytes), elementNode->localName->length); if (elementNode->attributeCount != 0) { context->readingAttributes = true; context->attributeElementNode = elementNode; context->attributeIndex = 0; } else { moveToNext(); } return XmlNode{XmlNodeType::StartTag, std::move(name)}; } case WS_XML_NODE_TYPE_TEXT: { std::string value; while (true) { const WS_XML_TEXT_NODE* textNode = (const WS_XML_TEXT_NODE*)node; if (textNode->text->textType != WS_XML_TEXT_TYPE_UTF8) { throw std::runtime_error("Unsupported xml encoding."); } const WS_XML_UTF8_TEXT* utf8Text = reinterpret_cast<const WS_XML_UTF8_TEXT*>(textNode->text); value += std::string( reinterpret_cast<const char*>(utf8Text->value.bytes), utf8Text->value.length); moveToNext(); ret = WsGetReaderNode(context->reader, &node, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to parse xml."); } if (node->nodeType != WS_XML_NODE_TYPE_TEXT) { break; } } return XmlNode{XmlNodeType::Text, std::string(), std::move(value)}; } case WS_XML_NODE_TYPE_END_ELEMENT: moveToNext(); return XmlNode{XmlNodeType::EndTag}; case WS_XML_NODE_TYPE_EOF: return XmlNode{XmlNodeType::End}; case WS_XML_NODE_TYPE_CDATA: case WS_XML_NODE_TYPE_END_CDATA: case WS_XML_NODE_TYPE_COMMENT: case WS_XML_NODE_TYPE_BOF: moveToNext(); return Read(); default: throw std::runtime_error( "Unknown type " + std::to_string(node->nodeType) + " while parsing xml."); } } struct XmlWriterContext { XmlWriterContext() { HRESULT ret = WsCreateError(nullptr, 0, &error); if (ret != NO_ERROR) { throw std::runtime_error("Failed to initialize xml writer."); } ret = WsCreateWriter(nullptr, 0, &writer, error); if (ret != NO_ERROR) { WsFreeError(error); throw std::runtime_error("Failed to initialize xml writer."); } ret = WsCreateHeap(1024 * 1024 * 1024, 512, nullptr, 0, &heap, error); if (ret != NO_ERROR) { WsFreeWriter(writer); WsFreeError(error); throw std::runtime_error("Failed to initialize xml writer."); } } XmlWriterContext(const XmlWriterContext&) = delete; XmlWriterContext& operator=(const XmlWriterContext&) = delete; ~XmlWriterContext() { WsFreeHeap(heap); WsFreeWriter(writer); WsFreeError(error); } WS_XML_WRITER* writer = nullptr; WS_ERROR* error = nullptr; WS_HEAP* heap = nullptr; WS_XML_BUFFER* buffer = nullptr; }; XmlWriter::XmlWriter() { auto context = std::make_unique<XmlWriterContext>(); HRESULT ret = WsCreateXmlBuffer(context->heap, nullptr, 0, &context->buffer, context->error); if (ret != NO_ERROR) { throw std::runtime_error("Failed to initialize xml writer."); } ret = WsSetOutputToBuffer(context->writer, context->buffer, nullptr, 0, context->error); if (ret != NO_ERROR) { throw std::runtime_error("Failed to initialize xml writer."); } m_context = context.release(); } XmlWriter::~XmlWriter() { if (m_context) { delete static_cast<XmlWriterContext*>(m_context); } } void XmlWriter::Write(XmlNode node) { auto context = static_cast<XmlWriterContext*>(m_context); if (node.Type == XmlNodeType::StartTag) { if (node.HasValue) { Write(XmlNode{XmlNodeType::StartTag, std::move(node.Name)}); Write(XmlNode{XmlNodeType::Text, std::string(), std::move(node.Value)}); Write(XmlNode{XmlNodeType::EndTag}); return; } WS_XML_STRING name; name.bytes = reinterpret_cast<BYTE*>(&node.Name[0]); name.length = static_cast<ULONG>(node.Name.length()); name.dictionary = nullptr; WS_XML_STRING ns = WS_XML_STRING_NULL; HRESULT ret = WsWriteStartElement(context->writer, nullptr, &name, &ns, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to write xml."); } } else if (node.Type == XmlNodeType::EndTag) { HRESULT ret = WsWriteEndElement(context->writer, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to write xml."); } } else if (node.Type == XmlNodeType::Text) { HRESULT ret = WsWriteCharsUtf8( context->writer, reinterpret_cast<const BYTE*>(node.Value.data()), static_cast<ULONG>(node.Value.size()), context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to write xml."); } } else if (node.Type == XmlNodeType::Attribute) { WS_XML_STRING name; name.bytes = reinterpret_cast<BYTE*>(&node.Name[0]); name.length = static_cast<ULONG>(node.Name.length()); name.dictionary = nullptr; WS_XML_STRING ns = WS_XML_STRING_NULL; HRESULT ret = WsWriteStartAttribute(context->writer, nullptr, &name, &ns, FALSE, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to write xml."); } Write(XmlNode{XmlNodeType::Text, std::string(), std::move(node.Value)}); ret = WsWriteEndAttribute(context->writer, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to write xml."); } } else if (node.Type == XmlNodeType::End) { } else { throw std::runtime_error( "Unsupported XmlNode type " + std::to_string(static_cast<std::underlying_type<XmlNodeType>::type>(node.Type)) + "."); } } std::string XmlWriter::GetDocument() { auto context = static_cast<XmlWriterContext*>(m_context); BOOL boolValueTrue = TRUE; WS_XML_WRITER_PROPERTY writerProperty[2]; writerProperty[0].id = WS_XML_WRITER_PROPERTY_WRITE_DECLARATION; writerProperty[0].value = &boolValueTrue; writerProperty[0].valueSize = sizeof(boolValueTrue); writerProperty[1].id = WS_XML_WRITER_PROPERTY_BUFFER_MAX_SIZE; ULONG maxBufferSize = 256 * 1024 * 1024UL; writerProperty[1].value = &maxBufferSize; writerProperty[1].valueSize = sizeof(maxBufferSize); void* xml = nullptr; ULONG xmlLength = 0; HRESULT ret = WsWriteXmlBufferToBytes( context->writer, context->buffer, nullptr, writerProperty, sizeof(writerProperty) / sizeof(writerProperty[0]), context->heap, &xml, &xmlLength, context->error); if (!SUCCEEDED(ret)) { throw std::runtime_error("Failed to write xml."); } return std::string(static_cast<const char*>(xml), xmlLength); } #else struct XmlGlobalInitializer final { XmlGlobalInitializer() { xmlInitParser(); } ~XmlGlobalInitializer() { xmlCleanupParser(); } }; static void XmlGlobalInitialize() { static XmlGlobalInitializer globalInitializer; } struct XmlReaderContext { xmlTextReaderPtr reader = nullptr; bool readingAttributes = false; bool readingEmptyTag = false; }; XmlReader::XmlReader(const char* data, size_t length) { XmlGlobalInitialize(); if (length > static_cast<size_t>((std::numeric_limits<int>::max)())) { throw std::runtime_error("Xml data too big."); } xmlTextReaderPtr reader = xmlReaderForMemory(data, static_cast<int>(length), nullptr, nullptr, 0); if (!reader) { throw std::runtime_error("Failed to parse xml."); } XmlReaderContext* context = new XmlReaderContext(); context->reader = reader; m_context = context; } XmlReader::~XmlReader() { if (m_context) { auto context = static_cast<XmlReaderContext*>(m_context); xmlFreeTextReader(static_cast<xmlTextReaderPtr>(context->reader)); delete context; } } XmlNode XmlReader::Read() { auto context = static_cast<XmlReaderContext*>(m_context); if (context->readingAttributes) { int ret = xmlTextReaderMoveToNextAttribute(context->reader); if (ret == 1) { const char* name = reinterpret_cast<const char*>(xmlTextReaderConstName(context->reader)); const char* value = reinterpret_cast<const char*>(xmlTextReaderConstValue(context->reader)); return XmlNode{XmlNodeType::Attribute, name, value}; } else if (ret == 0) { context->readingAttributes = false; } else { throw std::runtime_error("Failed to parse xml."); } } if (context->readingEmptyTag) { context->readingEmptyTag = false; return XmlNode{XmlNodeType::EndTag}; } int ret = xmlTextReaderRead(context->reader); if (ret == 0) { return XmlNode{XmlNodeType::End}; } if (ret != 1) { throw std::runtime_error("Failed to parse xml."); } int type = xmlTextReaderNodeType(context->reader); bool is_empty = xmlTextReaderIsEmptyElement(context->reader) == 1; bool has_value = xmlTextReaderHasValue(context->reader) == 1; bool has_attributes = xmlTextReaderHasAttributes(context->reader) == 1; const char* name = reinterpret_cast<const char*>(xmlTextReaderConstName(context->reader)); const char* value = reinterpret_cast<const char*>(xmlTextReaderConstValue(context->reader)); if (has_attributes) { context->readingAttributes = true; } if (type == XML_READER_TYPE_ELEMENT && is_empty) { context->readingEmptyTag = true; return XmlNode{XmlNodeType::StartTag, name}; } else if (type == XML_READER_TYPE_ELEMENT) { return XmlNode{XmlNodeType::StartTag, name}; } else if (type == XML_READER_TYPE_END_ELEMENT) { return XmlNode{XmlNodeType::EndTag}; } else if (type == XML_READER_TYPE_TEXT) { if (has_value) { return XmlNode{XmlNodeType::Text, std::string(), value}; } } else if (type == XML_READER_TYPE_SIGNIFICANT_WHITESPACE) { // silently ignore } else { throw std::runtime_error("Unknown type " + std::to_string(type) + " while parsing xml."); } return Read(); } struct XmlWriterContext { xmlBufferPtr buffer; xmlTextWriterPtr writer; }; XmlWriter::XmlWriter() { XmlGlobalInitialize(); auto buffer = xmlBufferCreate(); if (!buffer) { throw std::runtime_error("Failed to initialize xml writer."); } auto writer = xmlNewTextWriterMemory(static_cast<xmlBufferPtr>(buffer), 0); if (!writer) { xmlBufferFree(static_cast<xmlBufferPtr>(buffer)); throw std::runtime_error("Failed to initialize xml writer."); } xmlTextWriterStartDocument(static_cast<xmlTextWriterPtr>(writer), nullptr, nullptr, nullptr); auto context = new XmlWriterContext; context->buffer = buffer; context->writer = writer; m_context = context; } XmlWriter::~XmlWriter() { if (m_context) { auto context = static_cast<XmlWriterContext*>(m_context); xmlFreeTextWriter(static_cast<xmlTextWriterPtr>(context->writer)); xmlBufferFree(static_cast<xmlBufferPtr>(context->buffer)); delete context; } } namespace { inline xmlChar* BadCast(const char* x) { return const_cast<xmlChar*>(reinterpret_cast<const xmlChar*>(x)); } } // namespace void XmlWriter::Write(XmlNode node) { auto context = static_cast<XmlWriterContext*>(m_context); xmlTextWriterPtr writer = context->writer; if (node.Type == XmlNodeType::StartTag) { if (!node.HasValue) { xmlTextWriterStartElement(writer, BadCast(node.Name.data())); } else { xmlTextWriterWriteElement(writer, BadCast(node.Name.data()), BadCast(node.Value.data())); } } else if (node.Type == XmlNodeType::EndTag) { xmlTextWriterEndElement(writer); } else if (node.Type == XmlNodeType::Text) { xmlTextWriterWriteString(writer, BadCast(node.Value.data())); } else if (node.Type == XmlNodeType::Attribute) { xmlTextWriterWriteAttribute(writer, BadCast(node.Name.data()), BadCast(node.Value.data())); } else if (node.Type == XmlNodeType::End) { xmlTextWriterEndDocument(writer); } else { throw std::runtime_error( "Unsupported XmlNode type " + std::to_string(static_cast<std::underlying_type<XmlNodeType>::type>(node.Type)) + "."); } } std::string XmlWriter::GetDocument() { auto context = static_cast<XmlWriterContext*>(m_context); xmlBufferPtr buffer = context->buffer; return std::string(reinterpret_cast<const char*>(buffer->content), buffer->use); } #endif }}}}} // namespace Azure::Data::Tables::_detail::Xml