extensions/standard-processors/processors/HashContent.h (147 lines of code) (raw):

/** * @file HashContent.h * HashContent class declaration * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <openssl/evp.h> #include <openssl/sha.h> #include <openssl/md5.h> #include <array> #include <cstdint> #include <iomanip> #include <map> #include <memory> #include <string> #include <sstream> #include <utility> #include "core/Processor.h" #include "core/PropertyDefinition.h" #include "core/PropertyDefinitionBuilder.h" #include "core/RelationshipDefinition.h" #include "core/ProcessSession.h" #include "utils/StringUtils.h" #include "utils/Export.h" using HashReturnType = std::pair<std::string, int64_t>; // Without puttng this into its own namespace, the code would export already defined symbols. namespace { // NOLINT #define HASH_BUFFER_SIZE 16384 HashReturnType MD5Hash(const std::shared_ptr<org::apache::nifi::minifi::io::InputStream>& stream) { HashReturnType ret_val; ret_val.second = 0; std::array<std::byte, HASH_BUFFER_SIZE> buffer{}; EVP_MD_CTX *context = EVP_MD_CTX_new(); EVP_MD *md5 = EVP_MD_fetch(nullptr, "MD5", "-fips"); const auto guard = gsl::finally([&context, &md5]() { EVP_MD_free(md5); EVP_MD_CTX_free(context); }); if (!md5) { return ret_val; } EVP_DigestInit_ex(context, md5, nullptr); size_t ret = 0; do { ret = stream->read(buffer); if (ret > 0) { EVP_DigestUpdate(context, buffer.data(), ret); ret_val.second += gsl::narrow<int64_t>(ret); } } while (ret > 0); if (ret_val.second > 0) { std::array<std::byte, MD5_DIGEST_LENGTH> digest{}; EVP_DigestFinal_ex(context, reinterpret_cast<unsigned char*>(digest.data()), nullptr); ret_val.first = org::apache::nifi::minifi::utils::string::to_hex(digest, true /*uppercase*/); } return ret_val; } HashReturnType SHA1Hash(const std::shared_ptr<org::apache::nifi::minifi::io::InputStream>& stream) { HashReturnType ret_val; ret_val.second = 0; std::array<std::byte, HASH_BUFFER_SIZE> buffer{}; EVP_MD_CTX *context = EVP_MD_CTX_new(); const auto guard = gsl::finally([&context]() { EVP_MD_CTX_free(context); }); EVP_DigestInit_ex(context, EVP_sha1(), nullptr); size_t ret = 0; do { ret = stream->read(buffer); if (ret > 0) { EVP_DigestUpdate(context, buffer.data(), ret); ret_val.second += gsl::narrow<int64_t>(ret); } } while (ret > 0); if (ret_val.second > 0) { std::array<std::byte, SHA_DIGEST_LENGTH> digest{}; EVP_DigestFinal_ex(context, reinterpret_cast<unsigned char*>(digest.data()), nullptr); ret_val.first = org::apache::nifi::minifi::utils::string::to_hex(digest, true /*uppercase*/); } return ret_val; } HashReturnType SHA256Hash(const std::shared_ptr<org::apache::nifi::minifi::io::InputStream>& stream) { HashReturnType ret_val; ret_val.second = 0; std::array<std::byte, HASH_BUFFER_SIZE> buffer{}; EVP_MD_CTX *context = EVP_MD_CTX_new(); const auto guard = gsl::finally([&context]() { EVP_MD_CTX_free(context); }); EVP_DigestInit_ex(context, EVP_sha256(), nullptr); size_t ret; do { ret = stream->read(buffer); if (ret > 0) { EVP_DigestUpdate(context, buffer.data(), ret); ret_val.second += gsl::narrow<int64_t>(ret); } } while (ret > 0); if (ret_val.second > 0) { std::array<std::byte, SHA256_DIGEST_LENGTH> digest{}; EVP_DigestFinal_ex(context, reinterpret_cast<unsigned char*>(digest.data()), nullptr); ret_val.first = org::apache::nifi::minifi::utils::string::to_hex(digest, true /*uppercase*/); } return ret_val; } } // namespace namespace org::apache::nifi::minifi::processors { static const std::map<std::string, const std::function<HashReturnType(const std::shared_ptr<io::InputStream>&)>> HashAlgos = { {"MD5", MD5Hash}, {"SHA1", SHA1Hash}, {"SHA256", SHA256Hash} }; class HashContent : public core::ProcessorImpl { public: explicit HashContent(const std::string_view name, const utils::Identifier& uuid = {}) : ProcessorImpl(name, uuid) { logger_ = core::logging::LoggerFactory<HashContent>::getLogger(uuid_); } EXTENSIONAPI static constexpr const char* Description = "HashContent calculates the checksum of the content of the flowfile and adds it as an attribute. " "Configuration options exist to select hashing algorithm and set the name of the attribute."; EXTENSIONAPI static constexpr auto HashAttribute = core::PropertyDefinitionBuilder<>::createProperty("Hash Attribute") .withDescription("Attribute to store checksum to") .withDefaultValue("Checksum") .build(); EXTENSIONAPI static constexpr auto HashAlgorithm = core::PropertyDefinitionBuilder<>::createProperty("Hash Algorithm") .withDescription("Name of the algorithm used to generate checksum") .withDefaultValue("SHA256") .isRequired(true) .build(); EXTENSIONAPI static constexpr auto FailOnEmpty = core::PropertyDefinitionBuilder<>::createProperty("Fail on empty") .withDescription("Route to failure relationship in case of empty content") .withDefaultValue("false") .withValidator(core::StandardPropertyValidators::BOOLEAN_VALIDATOR) .build(); EXTENSIONAPI static constexpr auto Properties = std::to_array<core::PropertyReference>({ HashAttribute, HashAlgorithm, FailOnEmpty }); EXTENSIONAPI static constexpr auto Success = core::RelationshipDefinition{"success", "success operational on the flow record"}; EXTENSIONAPI static constexpr auto Failure = core::RelationshipDefinition{"failure", "failure operational on the flow record"}; EXTENSIONAPI static constexpr auto Relationships = std::array{Success, Failure}; EXTENSIONAPI static constexpr bool SupportsDynamicProperties = false; EXTENSIONAPI static constexpr bool SupportsDynamicRelationships = false; EXTENSIONAPI static constexpr core::annotation::Input InputRequirement = core::annotation::Input::INPUT_REQUIRED; EXTENSIONAPI static constexpr bool IsSingleThreaded = false; ADD_COMMON_VIRTUAL_FUNCTIONS_FOR_PROCESSORS void onSchedule(core::ProcessContext& context, core::ProcessSessionFactory& session_factory) override; void onTrigger(core::ProcessContext& context, core::ProcessSession& session) override; void initialize() override; private: std::function<HashReturnType(const std::shared_ptr<io::InputStream>&)> algorithm_ = SHA256Hash; std::string attrKey_; bool failOnEmpty_{}; }; } // namespace org::apache::nifi::minifi::processors