thrift/compiler/parse/parsing_driver.h (272 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <cstddef> #include <limits> #include <memory> #include <set> #include <stack> #include <string> #include <unordered_set> #include <vector> #include <boost/optional.hpp> #include <thrift/compiler/ast/diagnostic_context.h> #include <thrift/compiler/ast/node_list.h> #include <thrift/compiler/ast/t_const_value.h> #include <thrift/compiler/ast/t_exception.h> #include <thrift/compiler/ast/t_field.h> #include <thrift/compiler/ast/t_interaction.h> #include <thrift/compiler/ast/t_named.h> #include <thrift/compiler/ast/t_node.h> #include <thrift/compiler/ast/t_package.h> #include <thrift/compiler/ast/t_program.h> #include <thrift/compiler/ast/t_program_bundle.h> #include <thrift/compiler/ast/t_scope.h> #include <thrift/compiler/ast/t_union.h> #include <thrift/compiler/parse/t_ref.h> // This is a macro because of a difference between the OSS and internal builds. #ifndef LOCATION_HH #define LOCATION_HH "thrift/compiler/parse/location.hh" #endif #include LOCATION_HH namespace apache { namespace thrift { namespace compiler { class lex_handler; class lexer; namespace yy { class parser; } // Parsing only representations. struct t_annotations { std::map<std::string, annotation_value> strings; std::map<std::string, std::shared_ptr<const t_const>> objects; }; using t_doc = boost::optional<std::string>; // TODO (partisan): Rename to t_stmt_attrs. struct t_def_attrs { t_doc doc; std::unique_ptr<node_list<t_const>> struct_annotations; }; template <typename T> class t_ref; enum class parsing_mode { INCLUDES = 1, PROGRAM = 2, }; struct parsing_params { // Default values are taken from the original global variables. parsing_params() noexcept {} // Disable aggregate initialization /** * Strictness level */ int strict = 127; /** * Whether or not negative field keys are accepted. * * When a field does not have a user-specified key, thrift automatically * assigns a negative value. However, this is fragile since changes to the * file may unintentionally change the key numbering, resulting in a new * protocol that is not backwards compatible. * * When allow_neg_field_keys is enabled, users can explicitly specify * negative keys. This way they can write a .thrift file with explicitly * specified keys that is still backwards compatible with older .thrift files * that did not specify key values. */ bool allow_neg_field_keys = false; /** * Whether or not 64-bit constants will generate a warning. * * Some languages don't support 64-bit constants, but many do, so we can * suppress this warning for projects that don't use any non-64-bit-safe * languages. */ bool allow_64bit_consts = false; /** * Which experimental features should be allowed. * * 'all' can be used to enable all experimental features. */ std::unordered_set<std::string> allow_experimental_features; /** * Search path for inclusions */ std::vector<std::string> incl_searchpath; }; class parsing_driver { private: class lex_handler_impl; std::unique_ptr<lex_handler_impl> lex_handler_; std::unique_ptr<lexer> lexer_; int get_lineno() const; std::string get_text() const; public: parsing_params params; /** * The last parsed doctext comment. */ t_doc doctext; /** * The location of the last parsed doctext comment. */ int doctext_lineno; /** * The parsing pass that we are on. We do different things on each pass. */ parsing_mode mode; /** * The master program parse tree. This is accessed from within the parser code * to build up the program elements. */ t_program* program; std::unique_ptr<t_program_bundle> program_bundle; /** * Global scope cache for faster compilations */ t_scope* scope_cache; /** * A global map that holds a pointer to all programs already cached */ std::map<std::string, t_program*> program_cache; parsing_driver( diagnostic_context& ctx, std::string path, parsing_params parse_params); ~parsing_driver(); const lexer& get_lexer() const { return *lexer_; } lexer& get_lexer() { return *lexer_; } /** * Parses a program and returns the resulted AST. * Diagnostic messages (warnings, debug messages, etc.) are reported via the * context provided in the constructor. */ std::unique_ptr<t_program_bundle> parse(); /** * Bison's type. */ using YYSTYPE = int; YYSTYPE yylval_ = 0; /** * Bison's structure to store location. */ using YYLTYPE = apache::thrift::compiler::yy::location; YYLTYPE yylloc_; /** * Diagnostic message callbacks. */ // TODO(afuller): Remove these, and have the parser call the functions on ctx_ // directly. template <typename... Args> void debug(Args&&... args) { ctx_.debug(get_lineno(), get_text(), std::forward<Args>(args)...); } template <typename... Args> void verbose(Args&&... args) { ctx_.info(get_lineno(), get_text(), std::forward<Args>(args)...); } template <typename... Args> void yyerror(Args&&... args) { ctx_.report( diagnostic_level::parse_error, get_lineno(), get_text(), std::forward<Args>(args)...); } template <typename... Args> void warning(Args&&... args) { ctx_.warning(get_lineno(), get_text(), std::forward<Args>(args)...); } template <typename... Args> void warning_strict(Args&&... args) { ctx_.warning_strict(get_lineno(), get_text(), std::forward<Args>(args)...); } template <typename... Args> [[noreturn]] void failure(Args&&... args) { ctx_.failure(get_lineno(), get_text(), std::forward<Args>(args)...); end_parsing(); } [[noreturn]] void end_parsing(); /** * Gets the directory path of a filename. */ static std::string directory_name(const std::string& filename); /** * Finds the appropriate file path for the given include filename. */ std::string find_include_file(const std::string& filename); /** * Check the type of the parsed const information against its declared type. */ void validate_const_type(t_const* c); /** * Check that the constant name does not refer to an ambiguous enum. * An ambiguous enum is one that is redefined but not referred to by * ENUM_NAME.ENUM_VALUE. */ void validate_not_ambiguous_enum(const std::string& name); /** * Clears any previously stored doctext string. * Also prints a warning if we are discarding information. */ void clear_doctext(); /** Return any doctext previously push-ed */ t_doc pop_doctext(); /** Strip comment chars and align leading whitespace on multiline doctext */ t_doc strip_doctext(const char* text); /** update doctext of given node */ void set_doctext(t_node& node, t_doc doctext) const; /** * Cleans up text commonly found in doxygen-like comments. * * Warning: if you mix tabs and spaces in a non-uniform way, * you will get what you deserve. */ t_doc clean_up_doctext(std::string docstring); // Checks if the given experimental features is enabled, and reports a failure // and returns false iff not. bool require_experimental_feature(const char* feature); /** * Hands a pointer to be deleted when the parsing driver itself destructs. */ template <typename T> void delete_at_the_end(T* ptr) { deleters_.push_back(deleter{ptr}); } // Returns the source range object containing the location information. source_range get_source_range(const YYLTYPE& loc) const; void reset_locations(); /* * To fix Bison's default location * (result's begin set to end of prev token and result's end set * to begin of next token) */ void avoid_tokens_loc( YYLTYPE& result_loc, const std::vector<std::pair<bool, YYLTYPE>>& last_loc_overrides, const std::vector<std::pair<bool, YYLTYPE>>& next_loc_overrides) { for (const auto& loc_override : last_loc_overrides) { if (!loc_override.first) { break; } result_loc.begin = loc_override.second.begin; } for (const auto& loc_override : next_loc_overrides) { if (!loc_override.first) { break; } result_loc.end = loc_override.second.end; } } // Populate the annotation on the given node. static void set_annotations( t_node* node, std::unique_ptr<t_annotations> annotations); std::unique_ptr<t_const> new_struct_annotation( std::unique_ptr<t_const_value> const_struct); std::unique_ptr<t_throws> new_throws( std::unique_ptr<t_field_list> exceptions); // Creates a reference to a known type, potentally with additional // annotations. t_type_ref new_type_ref( const t_type& type, std::unique_ptr<t_annotations> annotations); t_type_ref new_type_ref(t_type&& type, std::unique_ptr<t_annotations>) = delete; // Creates a reference to a newly instantiated templated type. t_type_ref new_type_ref( std::unique_ptr<t_templated_type> type, std::unique_ptr<t_annotations> annotations); // Creates a reference to a named type. t_type_ref new_type_ref( std::string name, std::unique_ptr<t_annotations> annotations, bool is_const = false); // Tries to set the given fields, reporting a failure on a collsion. // TODO(afuller): Disallow auto-id allocation. void set_fields(t_structured& tstruct, t_field_list&& fields); void set_functions( t_interface& node, std::unique_ptr<t_function_list> functions); // Populate the attributes on the given node. void set_attributes( t_named& node, std::unique_ptr<t_def_attrs> attrs, std::unique_ptr<t_annotations> annots, const YYLTYPE& loc) const; // Adds a definition to the program. t_ref<t_named> add_def(std::unique_ptr<t_named> node); void add_include(std::string name); void set_package(std::string name); t_field_id to_field_id(int64_t int_const) { return narrow_int<t_field_id>(int_const, "field ids"); } int32_t to_enum_value(int64_t int_const) { return narrow_int<int32_t>(int_const, "enum values"); } std::unique_ptr<t_const_value> to_const_value(int64_t int_const); int64_t to_int(uint64_t val, bool negative = false); const t_service* find_service(const std::string& name); const t_const* find_const(const std::string& name); std::unique_ptr<t_const_value> copy_const_value(const std::string& name); void set_parsed_definition(); void validate_header_location(); void validate_header_annotations( std::unique_ptr<t_def_attrs> statement_attrs, std::unique_ptr<t_annotations> annotations); void set_program_annotations( std::unique_ptr<t_def_attrs> statement_attrs, std::unique_ptr<t_annotations> annotations, const YYLTYPE& loc); private: class deleter { public: template <typename T> explicit deleter(T* ptr) : ptr_(ptr), delete_([](const void* ptr) { delete static_cast<const T*>(ptr); }) {} deleter(const deleter&) = delete; deleter& operator=(const deleter&) = delete; deleter(deleter&& rhs) noexcept : ptr_{rhs.ptr_}, delete_{rhs.delete_} { rhs.ptr_ = nullptr; rhs.delete_ = nullptr; } deleter& operator=(deleter&& rhs) { std::swap(ptr_, rhs.ptr_); std::swap(delete_, rhs.delete_); return *this; } ~deleter() { if (!!ptr_) { delete_(ptr_); } } private: const void* ptr_; void (*delete_)(const void*); }; void compute_location_impl( YYLTYPE& yylloc, YYSTYPE& yylval, const char* text); std::set<std::string> already_parsed_paths_; std::set<std::string> circular_deps_; std::unique_ptr<yy::parser> parser_; std::vector<deleter> deleters_; diagnostic_context& ctx_; std::unordered_set<std::string> programs_that_parsed_definition_; /** * Parse a single .thrift file. The file to parse is stored in params.program. */ void parse_file(); // Returns true if the node should be // added to the program. Otherwise, the driver itself // takes ownership of node. template <typename T> bool should_add_node(std::unique_ptr<T>& node) { if (mode != parsing_mode::PROGRAM) { delete_at_the_end(node.release()); return false; } return true; } // Adds an unnamed typedef to the program // TODO(afuller): Remove the need for these by an explicit t_type_ref node // that can annotatable. const t_type* add_unnamed_typedef( std::unique_ptr<t_typedef> node, std::unique_ptr<t_annotations> annotations); // Adds an placeholder typedef to the program // TODO(afuller): Remove the need for these by adding a explicit t_type_ref // node that can be resolved in a second passover the ast. const t_type* add_placeholder_typedef( std::unique_ptr<t_placeholder_typedef> node, std::unique_ptr<t_annotations> annotations); std::string scoped_name(const t_named& node) { return scoped_name(node.name()); } std::string scoped_name(const std::string& name) { return program->name() + "." + name; } std::string scoped_name(const t_named& owner, const t_named& node) { return program->name() + "." + owner.get_name() + "." + node.get_name(); } // Automatic numbering for field ids. // // Field id are assigned starting from -1 and working their way down. // // TODO(afuller): Move auto field ids to a post parse phase (or remove the // feature entirely). void allocate_field_id(t_field_id& next_id, t_field& field); void maybe_allocate_field_id(t_field_id& next_id, t_field& field); template <typename T> T narrow_int(int64_t int_const, const char* name) { using limits = std::numeric_limits<T>; if (int_const < limits::min() || int_const > limits::max()) { failure([&](auto& o) { o << "Integer constant (" << int_const << ") outside the range of " << name << " ([" << limits::min() << ", " << limits::max() << "])."; }); } return int_const; } }; } // namespace compiler } // namespace thrift } // namespace apache