analytical_engine/core/context/selector.h (262 lines of code) (raw):

/** Copyright 2020 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ANALYTICAL_ENGINE_CORE_CONTEXT_SELECTOR_H_ #define ANALYTICAL_ENGINE_CORE_CONTEXT_SELECTOR_H_ #include <glog/logging.h> #include <regex> #include <string> #include <utility> #include <vector> #include "boost/algorithm/string/case_conv.hpp" #include "boost/foreach.hpp" #include "boost/leaf/error.hpp" #include "boost/leaf/result.hpp" #include "boost/lexical_cast.hpp" #include "boost/property_tree/exceptions.hpp" #include "boost/property_tree/json_parser.hpp" #include "boost/property_tree/ptree.hpp" #include "vineyard/graph/fragment/property_graph_types.h" #include "core/error.h" namespace bl = boost::leaf; namespace gs { inline std::string generate_selectors( const std::vector<std::pair<std::string, std::string>>& selector_list) { boost::property_tree::ptree tree; for (auto& pair : selector_list) { tree.put(pair.first, pair.second); } std::stringstream ss; boost::property_tree::json_parser::write_json(ss, tree, false); return ss.str(); } enum class SelectorType { kVertexId, kVertexLabelId, kVertexData, kEdgeSrc, kEdgeDst, kEdgeData, kResult }; /** * @brief This is the model class of non-labeled selector. The selector is used * to select the data in the context/fragment. A selector can pick up the vertex * ids or the data attached to the vertex or the data in the context. */ class Selector { protected: explicit Selector(std::string property_name) : type_(SelectorType::kResult), property_name_(std::move(property_name)) {} explicit Selector(SelectorType type) : type_(type) {} public: virtual ~Selector() = default; SelectorType type() const { return type_; } std::string property_name() const { return property_name_; } virtual std::string str() const { switch (type_) { case SelectorType::kVertexId: return "v.id"; case SelectorType::kVertexLabelId: return "v.label_id"; case SelectorType::kVertexData: return "v.data"; case SelectorType::kEdgeSrc: return "e.src"; case SelectorType::kEdgeDst: return "e.dst"; case SelectorType::kEdgeData: return "e.data"; case SelectorType::kResult: { if (property_name_.empty()) return "r"; return "r." + property_name_; } } return ""; } /** * @brief parse a string selector to Selector object. * * @param selector, valid selector patterns: * v.id * v.data * r * r.prop_name * @return bl::result<Selector> */ static bl::result<Selector> parse(std::string selector) { boost::algorithm::to_lower(selector); std::smatch sm; std::regex r_vid("v\\.id"); std::regex r_vlabel_id("v\\.label_id"); std::regex r_vdata("v\\.data"); std::regex r_esrc("e\\.src"); std::regex r_edst("e\\.dst"); std::regex r_edata("e\\.data"); std::regex r_result("r"); std::regex r_result_prop("r\\.(.*?)"); if (std::regex_match(selector, sm, r_vid)) { return Selector(SelectorType::kVertexId); } else if (std::regex_match(selector, sm, r_vlabel_id)) { return Selector(SelectorType::kVertexLabelId); } else if (std::regex_match(selector, sm, r_vdata)) { return Selector(SelectorType::kVertexData); } else if (std::regex_match(selector, sm, r_esrc)) { return Selector(SelectorType::kEdgeSrc); } else if (std::regex_match(selector, sm, r_edst)) { return Selector(SelectorType::kEdgeDst); } else if (std::regex_match(selector, sm, r_edata)) { return Selector(SelectorType::kEdgeData); } else if (std::regex_match(selector, sm, r_result)) { return Selector(SelectorType::kResult); } else if (std::regex_match(selector, sm, r_result_prop)) { std::string prop_name = sm[1]; if (prop_name.empty()) { RETURN_GS_ERROR( vineyard::ErrorCode::kInvalidValueError, "Property name not found, the selector is: " + selector); } return Selector(prop_name); } RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidValueError, "Invalid syntax, the selector is: " + selector); } /** * @brief parse selectors from a json string. * * @param s_selectors JSON {"col_name": "selector", ...} * @return bl::result<std::vector<std::pair<std::string, Selector>>> */ static bl::result<std::vector<std::pair<std::string, Selector>>> ParseSelectors(const std::string& s_selectors) { std::stringstream ss(s_selectors); boost::property_tree::ptree pt; std::vector<std::pair<std::string, Selector>> selectors; try { boost::property_tree::read_json(ss, pt); BOOST_FOREACH // NOLINT(whitespace/parens) (boost::property_tree::ptree::value_type & v, pt) { CHECK(v.second.empty()); std::string col_name = v.first; std::string s_selector = v.second.data(); BOOST_LEAF_AUTO(selector, Selector::parse(s_selector)); selectors.emplace_back(col_name, selector); } } catch (boost::property_tree::ptree_error& e) { RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidValueError, "Failed to parse json: " + s_selectors); } return selectors; } private: SelectorType type_; std::string property_name_; }; /** * @brief LabeledSelector is used to select the data in the labeled * context/fragment. A selector can pick up the vertex ids or the data attached * to the vertex or the data in the context. */ class LabeledSelector : public Selector { using label_id_t = vineyard::property_graph_types::LABEL_ID_TYPE; using prop_id_t = vineyard::property_graph_types::PROP_ID_TYPE; LabeledSelector(SelectorType type, label_id_t label_id) : Selector(type), label_id_(label_id), property_id_(0) {} LabeledSelector(SelectorType type, label_id_t label_id, prop_id_t prop_id) : Selector(type), label_id_(label_id), property_id_(prop_id) {} LabeledSelector(label_id_t label_id, std::string prop_name) : Selector(std::move(prop_name)), label_id_(label_id), property_id_(0) {} public: label_id_t label_id() const { return label_id_; } prop_id_t property_id() const { return property_id_; } std::string str() const override { switch (type()) { case SelectorType::kVertexId: return "v:label" + std::to_string(label_id_) + ".id"; case SelectorType::kVertexData: return "v:label" + std::to_string(label_id_) + ".property" + std::to_string(property_id_); case SelectorType::kEdgeSrc: return "e:label" + std::to_string(label_id_) + ".src"; case SelectorType::kEdgeDst: return "e:label" + std::to_string(label_id_) + ".dst"; case SelectorType::kEdgeData: return "e:label" + std::to_string(label_id_) + ".property" + std::to_string(property_id_); case SelectorType::kResult: { std::string ret = "r:label" + std::to_string(label_id_); if (!property_name().empty()) { ret = ret + "." + property_name(); } return ret; } default: break; } return ""; } /** * * @param selector * v:label{x}.id * v:label{x}.property{y} * e:label{x}.src * e:label{x}.dst * e:label{x}.property{y} * r:label{x}[.prop_name] * * \a x and \a y represent the index of the label and property responsively. * @return */ static bl::result<LabeledSelector> parse(std::string selector) { boost::algorithm::to_lower(selector); std::smatch sm; std::regex r_vid("v:label(\\d+)\\.id"); std::regex r_vdata("v:label(\\d+)\\.property(\\d+)"); std::regex r_esrc_id("e:label(\\d+)\\.src"); std::regex r_edst_id("e:label(\\d+)\\.dst"); std::regex r_edata("e:label(\\d+)\\.property(\\d+)"); std::regex r_result("r:label(\\d+)"); std::regex r_result_prop("r:label(\\d+)\\.(.*?)"); if (std::regex_match(selector, sm, r_vid)) { auto label_id = boost::lexical_cast<label_id_t>(sm[1]); return LabeledSelector(SelectorType::kVertexId, label_id); } else if (std::regex_match(selector, sm, r_vdata)) { auto label_id = boost::lexical_cast<label_id_t>(sm[1]); auto prop_id = boost::lexical_cast<prop_id_t>(sm[2]); return LabeledSelector(SelectorType::kVertexData, label_id, prop_id); } else if (std::regex_match(selector, sm, r_esrc_id)) { auto label_id = boost::lexical_cast<label_id_t>(sm[1]); return LabeledSelector(SelectorType::kEdgeSrc, label_id); } else if (std::regex_match(selector, sm, r_edst_id)) { auto label_id = boost::lexical_cast<label_id_t>(sm[1]); return LabeledSelector(SelectorType::kEdgeDst, label_id); } else if (std::regex_match(selector, sm, r_edata)) { auto label_id = boost::lexical_cast<label_id_t>(sm[1]); auto prop_id = boost::lexical_cast<prop_id_t>(sm[2]); return LabeledSelector(SelectorType::kEdgeData, label_id, prop_id); } else if (std::regex_match(selector, sm, r_result)) { auto label_id = boost::lexical_cast<label_id_t>(sm[1]); return LabeledSelector(SelectorType::kResult, label_id); } else if (std::regex_match(selector, sm, r_result_prop)) { auto label_id = boost::lexical_cast<label_id_t>(sm[1]); std::string prop_name = sm[2]; if (prop_name.empty()) { RETURN_GS_ERROR( vineyard::ErrorCode::kInvalidValueError, "Property name not found, the selector is: " + selector); } return LabeledSelector(label_id, prop_name); } RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidValueError, "Invalid syntax, the selector is: " + selector); } /** * * @param selectors selectors represented by JSON string e.g. {"col_name": * "selector", ...} * @return */ static bl::result<std::vector<std::pair<std::string, LabeledSelector>>> ParseSelectors(const std::string& s_selectors) { std::stringstream ss(s_selectors); boost::property_tree::ptree pt; std::vector<std::pair<std::string, LabeledSelector>> selectors; try { boost::property_tree::read_json(ss, pt); BOOST_FOREACH // NOLINT(whitespace/parens) (boost::property_tree::ptree::value_type & v, pt) { CHECK(v.second.empty()); std::string col_name = v.first; std::string s_selector = v.second.data(); BOOST_LEAF_AUTO(selector, LabeledSelector::parse(s_selector)); selectors.emplace_back(col_name, selector); } } catch (boost::property_tree::ptree_error& e) { RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidValueError, "Failed to parse json: " + s_selectors); } return selectors; } static bl::result<label_id_t> GetVertexLabelId( const std::vector<std::pair<std::string, LabeledSelector>>& selectors) { label_id_t label_id = -1; for (auto& pair : selectors) { auto& selector = pair.second; if (selector.type() == SelectorType::kVertexId || selector.type() == SelectorType::kVertexData || selector.type() == SelectorType::kResult) { if (label_id == -1) { label_id = selector.label_id(); } else if (selector.label_id() != label_id) { RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidOperationError, "Vertex label is not same"); } } } if (label_id == -1) { RETURN_GS_ERROR(vineyard::ErrorCode::kInvalidOperationError, "Can not found vertex label from selectors"); } return label_id; } private: label_id_t label_id_; prop_id_t property_id_; }; } // namespace gs #endif // ANALYTICAL_ENGINE_CORE_CONTEXT_SELECTOR_H_