be/src/exprs/json_functions.h (80 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include <fmt/format.h> #include <rapidjson/document.h> #include <simdjson.h> // IWYU pragma: keep #include <sstream> #include <string> #include <utility> #include <vector> #include "common/status.h" namespace simdjson { namespace fallback { namespace ondemand { class object; class value; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace doris { enum JsonFunctionType { JSON_FUN_INT = 0, JSON_FUN_DOUBLE, JSON_FUN_STRING, JSON_FUN_UNKNOWN //The last }; struct JsonPath { std::string key; // key of a json object int idx; // array index of a json array, -1 means not set, -2 means * bool is_valid; // true if the path is successfully parsed JsonPath(const std::string& key_, int idx_, bool is_valid_) : key(key_), idx(idx_), is_valid(is_valid_) {} JsonPath(std::string&& key_, int idx_, bool is_valid_) : key(std::move(key_)), idx(idx_), is_valid(is_valid_) {} std::string to_string() const { std::stringstream ss; if (!is_valid) { return "INVALID"; } if (!key.empty()) { ss << key; } if (idx == -2) { ss << "[*]"; } else if (idx > -1) { ss << "[" << idx << "]"; } return ss.str(); } std::string debug_string() const { return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid); } }; class JsonFunctions { public: /** * The `document` parameter must be has parsed. * return Value Is Array object * wrap_explicitly is set to true when the returned Array is wrapped actively. */ static rapidjson::Value* get_json_array_from_parsed_json( const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly); // this is only for test, it will parse the json path inside, // so that we can easily pass a json path as string. static rapidjson::Value* get_json_array_from_parsed_json( const std::string& jsonpath, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly); static rapidjson::Value* get_json_object_from_parsed_json( const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator); static void parse_json_paths(const std::string& path_strings, std::vector<JsonPath>* parsed_paths); // extract_from_object extracts value from object according to the json path. // Now, we do not support complete functions of json path. // Eg. city[*].id is not supported in this function static Status extract_from_object(simdjson::ondemand::object& obj, const std::vector<JsonPath>& jsonpath, simdjson::ondemand::value* value) noexcept; // src: {"a" : "b" {"c" : 1}, "e" : 123} // dst: {"a" : "b" {"d" : 1}} // merged: {"a" : "b" : {"c" : 1, "d" : 1}, "e" : 123} static void merge_objects(rapidjson::Value& dst_object, rapidjson::Value& src_object, rapidjson::Document::AllocatorType& allocator); static std::string print_json_value(const rapidjson::Value& value); static bool is_root_path(const std::vector<JsonPath>& json_path); private: static rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator, bool is_insert_null = false); static void get_parsed_paths(const std::vector<std::string>& path_exprs, std::vector<JsonPath>* parsed_paths); }; } // namespace doris