source/interprocedural_analyses/taint/modelQuery/modelQuery.ml (755 lines of code) (raw):

(* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. *) open Core open Pyre open Ast open Analysis open Interprocedural open Taint module ModelQuery = ModelParser.Internal.ModelQuery module ModelParser = struct include ModelParser.Internal include ModelParser end module DumpModelQueryResults : sig val dump : path:PyrePath.t -> models:Model.t Target.Map.t -> unit end = struct let dump ~path ~models = Log.warning "Emitting the model query results to `%s`" (PyrePath.absolute path); let content = let to_json (callable, model) = `Assoc [ "callable", `String (Target.external_target_name callable); ( "model", `List (Taint.Reporting.externalize ~filename_lookup:(fun _ -> None) callable None model) ); ] in models |> Map.to_alist |> fun models -> `List (List.map models ~f:to_json) |> Yojson.Safe.pretty_to_string in path |> File.create ~content |> File.write end let sanitized_location_insensitive_compare left right = let sanitize_decorator_argument ({ Expression.Call.Argument.name; value } as argument) = let new_name = match name with | None -> None | Some ({ Node.value = argument_name; _ } as previous_name) -> Some { previous_name with value = Identifier.sanitized argument_name } in let new_value = match value with | { Node.value = Expression.Expression.Name (Expression.Name.Identifier argument_value); _ } as previous_value -> { previous_value with value = Expression.Expression.Name (Expression.Name.Identifier (Identifier.sanitized argument_value)); } | _ -> value in { argument with name = new_name; value = new_value } in let left_sanitized = sanitize_decorator_argument left in let right_sanitized = sanitize_decorator_argument right in Expression.Call.Argument.location_insensitive_compare left_sanitized right_sanitized module SanitizedCallArgumentSet = Set.Make (struct type t = Expression.Call.Argument.t [@@deriving sexp] let compare = sanitized_location_insensitive_compare end) let is_ancestor ~resolution ~is_transitive ancestor_class child_class = if is_transitive then try GlobalResolution.is_transitive_successor ~placeholder_subclass_extends_all:false resolution ~predecessor:child_class ~successor:ancestor_class with | ClassHierarchy.Untracked _ -> false else let parents = GlobalResolution.immediate_parents ~resolution child_class in List.mem (child_class :: parents) ancestor_class ~equal:String.equal let matches_name_constraint ~name_constraint = match name_constraint with | ModelQuery.Equals string -> String.equal string | ModelQuery.Matches pattern -> Re2.matches pattern let matches_decorator_constraint ~name_constraint ~arguments_constraint decorator = let decorator_name_matches { Statement.Decorator.name = { Node.value = decorator_name; _ }; _ } = matches_name_constraint ~name_constraint (Reference.show decorator_name) in let decorator_arguments_matches { Statement.Decorator.arguments = decorator_arguments; _ } = let split_arguments = List.partition_tf ~f:(fun { Expression.Call.Argument.name; _ } -> match name with | None -> true | _ -> false) in let positional_arguments_equal left right = List.equal (fun l r -> Int.equal (sanitized_location_insensitive_compare l r) 0) left right in match arguments_constraint, decorator_arguments with | None, _ -> true | Some (ModelQuery.ArgumentsConstraint.Contains constraint_arguments), None -> List.is_empty constraint_arguments | Some (ModelQuery.ArgumentsConstraint.Contains constraint_arguments), Some arguments -> let constraint_positional_arguments, constraint_keyword_arguments = split_arguments constraint_arguments in let decorator_positional_arguments, decorator_keyword_arguments = split_arguments arguments in List.length constraint_positional_arguments <= List.length decorator_positional_arguments && positional_arguments_equal constraint_positional_arguments (List.take decorator_positional_arguments (List.length constraint_positional_arguments)) && SanitizedCallArgumentSet.is_subset (SanitizedCallArgumentSet.of_list constraint_keyword_arguments) ~of_:(SanitizedCallArgumentSet.of_list decorator_keyword_arguments) | Some (ModelQuery.ArgumentsConstraint.Equals constraint_arguments), None -> List.is_empty constraint_arguments | Some (ModelQuery.ArgumentsConstraint.Equals constraint_arguments), Some arguments -> let constraint_positional_arguments, constraint_keyword_arguments = split_arguments constraint_arguments in let decorator_positional_arguments, decorator_keyword_arguments = split_arguments arguments in (* Since equality comparison is more costly, check the lists are the same lengths first. *) Int.equal (List.length constraint_positional_arguments) (List.length decorator_positional_arguments) && positional_arguments_equal constraint_positional_arguments decorator_positional_arguments && SanitizedCallArgumentSet.equal (SanitizedCallArgumentSet.of_list constraint_keyword_arguments) (SanitizedCallArgumentSet.of_list decorator_keyword_arguments) in match Statement.Decorator.from_expression decorator with | None -> false | Some decorator -> decorator_name_matches decorator && decorator_arguments_matches decorator let matches_annotation_constraint ~annotation_constraint ~annotation = let open Expression in match annotation_constraint, annotation with | ( ModelQuery.IsAnnotatedTypeConstraint, { Node.value = Expression.Call { Call.callee = { Node.value = Name (Name.Attribute { base = { Node.value = Name (Name.Attribute { attribute = "Annotated"; _ }); _ }; _; }); _; }; _; }; _; } ) -> true | ModelQuery.AnnotationNameConstraint name_constraint, annotation_expression -> matches_name_constraint ~name_constraint (Expression.show annotation_expression) | _ -> false let rec normalized_parameter_matches_constraint ~resolution ~parameter: ((root, parameter_name, { Node.value = { Expression.Parameter.annotation; _ }; _ }) as parameter) = function | ModelQuery.ParameterConstraint.AnnotationConstraint annotation_constraint -> annotation >>| (fun annotation -> matches_annotation_constraint ~annotation_constraint ~annotation) |> Option.value ~default:false | ModelQuery.ParameterConstraint.NameConstraint name_constraint -> matches_name_constraint ~name_constraint (Identifier.sanitized parameter_name) | ModelQuery.ParameterConstraint.IndexConstraint index -> ( match root with | AccessPath.Root.PositionalParameter { position; _ } when position = index -> true | _ -> false) | ModelQuery.ParameterConstraint.AnyOf constraints -> List.exists constraints ~f:(normalized_parameter_matches_constraint ~resolution ~parameter) | ModelQuery.ParameterConstraint.Not query_constraint -> not (normalized_parameter_matches_constraint ~resolution ~parameter query_constraint) | ModelQuery.ParameterConstraint.AllOf constraints -> List.for_all constraints ~f:(normalized_parameter_matches_constraint ~resolution ~parameter) let rec callable_matches_constraint query_constraint ~resolution ~callable = let get_callable_type = Memo.unit (fun () -> let callable_type = Target.get_module_and_definition ~resolution callable >>| snd in if Option.is_none callable_type then Log.error "Could not find callable type for callable: `%s`" (Target.show callable); callable_type) in match query_constraint with | ModelQuery.DecoratorConstraint { name_constraint; arguments_constraint } -> ( match get_callable_type () with | Some { Node.value = { Statement.Define.signature = { Statement.Define.Signature.decorators = _ :: _ as decorators; _ }; _; }; _; } -> List.exists decorators ~f:(fun decorator -> matches_decorator_constraint ~name_constraint ~arguments_constraint decorator) | _ -> false) | ModelQuery.NameConstraint name_constraint -> matches_name_constraint ~name_constraint (Target.external_target_name callable) | ModelQuery.ReturnConstraint annotation_constraint -> ( let callable_type = get_callable_type () in match callable_type with | Some { Node.value = { Statement.Define.signature = { Statement.Define.Signature.return_annotation; _ }; _; }; _; } -> return_annotation >>| (fun annotation -> matches_annotation_constraint ~annotation_constraint ~annotation) |> Option.value ~default:false | _ -> false) | ModelQuery.AnyParameterConstraint parameter_constraint -> ( let callable_type = get_callable_type () in match callable_type with | Some { Node.value = { Statement.Define.signature = { Statement.Define.Signature.parameters; _ }; _ }; _; } -> AccessPath.Root.normalize_parameters parameters |> List.exists ~f:(fun parameter -> normalized_parameter_matches_constraint ~resolution ~parameter parameter_constraint) | _ -> false) | ModelQuery.AnyOf constraints -> List.exists constraints ~f:(callable_matches_constraint ~resolution ~callable) | ModelQuery.AllOf constraints -> List.for_all constraints ~f:(callable_matches_constraint ~resolution ~callable) | ModelQuery.Not query_constraint -> not (callable_matches_constraint ~resolution ~callable query_constraint) | ModelQuery.ParentConstraint (NameSatisfies name_constraint) -> Target.class_name callable >>| matches_name_constraint ~name_constraint |> Option.value ~default:false | ModelQuery.ParentConstraint (Extends { class_name; is_transitive }) -> Target.class_name callable >>| is_ancestor ~resolution ~is_transitive class_name |> Option.value ~default:false | _ -> failwith "impossible case" let apply_callable_productions ~resolution ~productions ~callable = let definition = Target.get_module_and_definition ~resolution callable in match definition with | None -> [] | Some ( _, { Node.value = { Statement.Define.signature = { Statement.Define.Signature.parameters; return_annotation; _ }; _; }; _; } ) -> let production_to_taint ?(parameter = None) ~production annotation = let open Expression in let get_subkind_from_annotation ~pattern annotation = let get_annotation_of_type annotation = match annotation >>| Node.value with | Some (Expression.Call { Call.callee = { Node.value = callee; _ }; arguments }) -> ( match callee with | Name (Name.Attribute { base = { Node.value = Name (Name.Attribute { attribute = "Annotated"; _ }); _ }; _; }) -> ( match arguments with | [ { Call.Argument.value = { Node.value = Expression.Tuple [_; annotation]; _ }; _; }; ] -> Some annotation | _ -> None) | _ -> None) | _ -> None in match get_annotation_of_type annotation with | Some { Node.value = Expression.Call { Call.callee = { Node.value = Name (Name.Identifier callee_name); _ }; arguments = [ { Call.Argument.value = { Node.value = Name (Name.Identifier subkind); _ }; _; }; ]; }; _; } -> if String.equal callee_name pattern then Some subkind else None | _ -> None in let update_placeholder_via_feature ~actual_parameter = (* If we see a via_feature on the $global attribute symbolic parameter in the taint for an actual parameter, we replace it with the actual parameter. *) let open Features in function | ViaFeature.ViaTypeOf { parameter = AccessPath.Root.PositionalParameter { position = 0; name = "$global"; positional_only = false }; tag; } -> ViaFeature.ViaTypeOf { parameter = actual_parameter; tag } | ViaFeature.ViaValueOf { parameter = AccessPath.Root.PositionalParameter { position = 0; name = "$global"; positional_only = false }; tag; } -> ViaFeature.ViaValueOf { parameter = actual_parameter; tag } | feature -> feature in let update_placeholder_via_features taint_annotation = match parameter, taint_annotation with | Some actual_parameter, ModelParser.Source source -> let via_features = List.map ~f:(update_placeholder_via_feature ~actual_parameter) source.via_features in ModelParser.Source { source with via_features } | Some actual_parameter, ModelParser.Sink sink -> let via_features = List.map ~f:(update_placeholder_via_feature ~actual_parameter) sink.via_features in ModelParser.Sink { sink with via_features } | Some actual_parameter, ModelParser.Tito tito -> let via_features = List.map ~f:(update_placeholder_via_feature ~actual_parameter) tito.via_features in ModelParser.Tito { tito with via_features } | Some actual_parameter, ModelParser.AddFeatureToArgument annotation -> let via_features = List.map ~f:(update_placeholder_via_feature ~actual_parameter) annotation.via_features in ModelParser.AddFeatureToArgument { annotation with via_features } | _ -> taint_annotation in match production with | ModelQuery.TaintAnnotation taint_annotation -> Some (update_placeholder_via_features taint_annotation) | ModelQuery.ParametricSourceFromAnnotation { source_pattern; kind } -> get_subkind_from_annotation ~pattern:source_pattern annotation >>| fun subkind -> ModelParser.Source { source = Sources.ParametricSource { source_name = kind; subkind }; breadcrumbs = []; via_features = []; path = []; leaf_names = []; leaf_name_provided = false; trace_length = None; } | ModelQuery.ParametricSinkFromAnnotation { sink_pattern; kind } -> get_subkind_from_annotation ~pattern:sink_pattern annotation >>| fun subkind -> ModelParser.Sink { sink = Sinks.ParametricSink { sink_name = kind; subkind }; breadcrumbs = []; via_features = []; path = []; leaf_names = []; leaf_name_provided = false; trace_length = None; } in let normalized_parameters = AccessPath.Root.normalize_parameters parameters in let apply_production = function | ModelQuery.ReturnTaint productions -> List.filter_map productions ~f:(fun production -> production_to_taint return_annotation ~production >>| fun taint -> ModelParser.ReturnAnnotation, taint) | ModelQuery.NamedParameterTaint { name; taint = productions } -> ( let parameter = List.find_map normalized_parameters ~f:(fun ( root, parameter_name, { Node.value = { Expression.Parameter.annotation; _ }; _ } ) -> if Identifier.equal_sanitized parameter_name name then Some (root, annotation) else None) in match parameter with | Some (parameter, annotation) -> List.filter_map productions ~f:(fun production -> production_to_taint annotation ~production >>| fun taint -> ModelParser.ParameterAnnotation parameter, taint) | None -> []) | ModelQuery.PositionalParameterTaint { index; taint = productions } -> ( let parameter = List.find_map normalized_parameters ~f:(fun (root, _, { Node.value = { Expression.Parameter.annotation; _ }; _ }) -> match root with | AccessPath.Root.PositionalParameter { position; _ } when position = index -> Some (root, annotation) | _ -> None) in match parameter with | Some (parameter, annotation) -> List.filter_map productions ~f:(fun production -> production_to_taint annotation ~production >>| fun taint -> ModelParser.ParameterAnnotation parameter, taint) | None -> []) | ModelQuery.AllParametersTaint { excludes; taint } -> let apply_parameter_production ( (root, parameter_name, { Node.value = { Expression.Parameter.annotation; _ }; _ }), production ) = if (not (List.is_empty excludes)) && List.mem excludes ~equal:String.equal (Identifier.sanitized parameter_name) then None else production_to_taint annotation ~production >>| fun taint -> ModelParser.ParameterAnnotation root, taint in List.cartesian_product normalized_parameters taint |> List.filter_map ~f:apply_parameter_production | ModelQuery.ParameterTaint { where; taint; _ } -> let apply_parameter_production ( ((root, _, { Node.value = { Expression.Parameter.annotation; _ }; _ }) as parameter), production ) = if List.for_all where ~f:(normalized_parameter_matches_constraint ~resolution ~parameter) then let parameter, _, _ = parameter in production_to_taint annotation ~production ~parameter:(Some parameter) >>| fun taint -> ModelParser.ParameterAnnotation root, taint else None in List.cartesian_product normalized_parameters taint |> List.filter_map ~f:apply_parameter_production | ModelQuery.AttributeTaint _ -> failwith "impossible case" in List.concat_map productions ~f:apply_production let apply_callable_query_rule ~verbose ~resolution ~rule:{ ModelQuery.rule_kind; query; productions; name } ~callable = let kind_matches = match callable, rule_kind with | `Function _, ModelQuery.FunctionModel | `Method _, ModelQuery.MethodModel -> true | _ -> false in if kind_matches && List.for_all ~f:(callable_matches_constraint ~resolution ~callable) query then begin if verbose then Log.info "Target `%a` matches all constraints for the model query rule%s." Target.pretty_print (callable :> Target.t) (name |> Option.map ~f:(Format.sprintf " `%s`") |> Option.value ~default:""); apply_callable_productions ~resolution ~productions ~callable end else [] let rec attribute_matches_constraint query_constraint ~resolution ~name ~annotation = let attribute_class_name = Reference.prefix name >>| Reference.show in match query_constraint with | ModelQuery.NameConstraint name_constraint -> matches_name_constraint ~name_constraint (Reference.show name) | ModelQuery.AnnotationConstraint annotation_constraint -> annotation >>| (fun annotation -> matches_annotation_constraint ~annotation_constraint ~annotation) |> Option.value ~default:false | ModelQuery.AnyOf constraints -> List.exists constraints ~f:(attribute_matches_constraint ~resolution ~name ~annotation) | ModelQuery.AllOf constraints -> List.for_all constraints ~f:(attribute_matches_constraint ~resolution ~name ~annotation) | ModelQuery.Not query_constraint -> not (attribute_matches_constraint ~resolution ~name ~annotation query_constraint) | ModelQuery.ParentConstraint (NameSatisfies name_constraint) -> attribute_class_name >>| matches_name_constraint ~name_constraint |> Option.value ~default:false | ModelQuery.ParentConstraint (Extends { class_name; is_transitive }) -> attribute_class_name >>| is_ancestor ~resolution ~is_transitive class_name |> Option.value ~default:false | _ -> failwith "impossible case" let apply_attribute_productions ~productions = let production_to_taint = function | ModelQuery.TaintAnnotation taint_annotation -> Some taint_annotation | _ -> None in let apply_production = function | ModelQuery.AttributeTaint productions -> List.filter_map productions ~f:production_to_taint | _ -> failwith "impossible case" in List.concat_map productions ~f:apply_production let apply_attribute_query_rule ~verbose ~resolution ~rule:{ ModelQuery.rule_kind; query; productions; name = rule_name } ~name ~annotation = let kind_matches = match rule_kind with | ModelQuery.AttributeModel -> true | _ -> false in if kind_matches && List.for_all ~f:(attribute_matches_constraint ~resolution ~name ~annotation) query then begin if verbose then Log.info "Attribute `%s` matches all constraints for the model query rule%s." (Reference.show name) (rule_name |> Option.map ~f:(Format.sprintf " `%s`") |> Option.value ~default:""); apply_attribute_productions ~productions end else [] let get_class_attributes ~global_resolution ~class_name = let class_summary = GlobalResolution.class_definition global_resolution (Type.Primitive class_name) >>| Node.value in match class_summary with | None -> [] | Some ({ name = class_name_reference; _ } as class_summary) -> let attributes, constructor_attributes = ( ClassSummary.attributes ~include_generated_attributes:false class_summary, ClassSummary.constructor_attributes class_summary ) in let all_attributes = Identifier.SerializableMap.union (fun _ x _ -> Some x) attributes constructor_attributes in let get_name_and_annotation_from_attributes attribute_name attribute accumulator = match attribute with | { Node.value = { ClassSummary.Attribute.kind = ClassSummary.Attribute.Simple { ClassSummary.Attribute.annotation; _ }; _; }; _; } -> (Reference.create ~prefix:class_name_reference attribute_name, annotation) :: accumulator | _ -> accumulator in Identifier.SerializableMap.fold get_name_and_annotation_from_attributes all_attributes [] let apply_all_rules ~resolution ~scheduler ~configuration ~rule_filter ~rules ~callables ~stubs ~environment ~models = let global_resolution = Resolution.global_resolution resolution in if List.length rules > 0 then ( let sources_to_keep, sinks_to_keep = ModelParser.compute_sources_and_sinks_to_keep ~configuration ~rule_filter in let merge_models new_models models = Map.merge_skewed new_models models ~combine:(fun ~key:_ left right -> Model.join left right) in let attribute_rules, callable_rules = List.partition_tf ~f:(fun { ModelQuery.rule_kind; _ } -> match rule_kind with | ModelQuery.AttributeModel -> true | _ -> false) rules in (* Generate models for functions and methods. *) let apply_rules_for_callable models callable = let taint_to_model = List.concat_map callable_rules ~f:(fun rule -> apply_callable_query_rule ~verbose:(Option.is_some configuration.dump_model_query_results_path) ~resolution:global_resolution ~rule ~callable) in if not (List.is_empty taint_to_model) then ( match ModelParser.create_callable_model_from_annotations ~resolution ~callable ~sources_to_keep ~sinks_to_keep ~is_obscure:(Hash_set.mem stubs (callable :> Target.t)) taint_to_model with | Ok model -> let models = let model = match Target.Map.find models (callable :> Target.t) with | Some existing_model -> Model.join existing_model model | None -> model in Target.Map.set models ~key:(callable :> Target.t) ~data:model in models | Error error -> Log.error "Error while executing model query: %s" (ModelVerificationError.display error); models) else models in let callables = List.filter_map callables ~f:(function | `Function _ as callable -> Some (callable :> Target.callable_t) | `Method _ as callable -> Some (callable :> Target.callable_t) | _ -> None) in let callable_models = Scheduler.map_reduce scheduler ~policy: (Scheduler.Policy.fixed_chunk_count ~minimum_chunk_size:500 ~preferred_chunks_per_worker:1 ()) ~initial:Target.Map.empty ~map:(fun models callables -> List.fold callables ~init:models ~f:apply_rules_for_callable) ~reduce:(fun new_models models -> Map.merge_skewed new_models models ~combine:(fun ~key:_ left right -> Model.join left right)) ~inputs:callables () in (* Generate models for attributes. *) let apply_rules_for_attribute models (name, annotation) = let taint_to_model = List.concat_map attribute_rules ~f:(fun rule -> apply_attribute_query_rule ~verbose:(Option.is_some configuration.dump_model_query_results_path) ~resolution:global_resolution ~rule ~name ~annotation) in if not (List.is_empty taint_to_model) then ( let callable = Target.create_object name in match ModelParser.create_attribute_model_from_annotations ~resolution ~name ~sources_to_keep ~sinks_to_keep taint_to_model with | Ok model -> let models = let model = match Target.Map.find models (callable :> Target.t) with | Some existing_model -> Model.join existing_model model | None -> model in Target.Map.set models ~key:(callable :> Target.t) ~data:model in models | Error error -> Log.error "Error while executing model query: %s" (ModelVerificationError.display error); models) else models in let attribute_models = if not (List.is_empty attribute_rules) then let all_classes = TypeEnvironment.ReadOnly.global_resolution environment |> GlobalResolution.unannotated_global_environment |> UnannotatedGlobalEnvironment.ReadOnly.all_classes in let attributes = List.concat_map all_classes ~f:(fun class_name -> get_class_attributes ~global_resolution ~class_name) in Scheduler.map_reduce scheduler ~policy: (Scheduler.Policy.fixed_chunk_count ~minimum_chunk_size:500 ~preferred_chunks_per_worker:1 ()) ~initial:Target.Map.empty ~map:(fun models attributes -> List.fold attributes ~init:models ~f:apply_rules_for_attribute) ~reduce:(fun new_models models -> Map.merge_skewed new_models models ~combine:(fun ~key:_ left right -> Model.join left right)) ~inputs:attributes () else Target.Map.empty in let new_models = merge_models callable_models attribute_models in begin match configuration.dump_model_query_results_path with | Some path -> DumpModelQueryResults.dump ~path ~models:new_models | None -> () end; merge_models new_models models) else models