source/interprocedural_analyses/taint/taintAnalysis.ml (385 lines of code) (raw):
(*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open Core
open Pyre
open Taint
module Target = Interprocedural.Target
(* Registers the Taint analysis with the interprocedural analysis framework. *)
include Taint.Result.Register (struct
include Taint.Result
let initialize_configuration
~static_analysis_configuration:
{ Configuration.StaticAnalysis.configuration = { taint_model_paths; _ }; _ }
=
(* In order to save time, sanity check models before starting the analysis. *)
Log.info "Verifying model syntax and configuration.";
let timer = Timer.start () in
ModelParser.get_model_sources ~paths:taint_model_paths
|> List.iter ~f:(fun (path, source) -> ModelParser.verify_model_syntax ~path ~source);
let (_ : TaintConfiguration.t) =
TaintConfiguration.create
~rule_filter:None
~find_missing_flows:None
~dump_model_query_results_path:None
~maximum_trace_length:None
~maximum_tito_depth:None
~taint_model_paths
|> TaintConfiguration.abort_on_error
in
Statistics.performance
~name:"Verified model syntax and configuration"
~phase_name:"Verifying model syntax and configuration"
~timer
()
type model_query_data = {
queries: ModelParser.Internal.ModelQuery.rule list;
taint_configuration: TaintConfiguration.t;
}
type parse_sources_result = {
initialize_result: Model.t Interprocedural.AnalysisResult.initialize_result;
query_data: model_query_data option;
}
let generate_models_from_queries
~scheduler
~static_analysis_configuration:
{ Configuration.StaticAnalysis.rule_filter; find_missing_flows; _ }
~environment
~callables
~stubs
~initialize_result:{ Interprocedural.AnalysisResult.initial_models = models; skip_overrides }
{ queries; taint_configuration }
=
let resolution =
Analysis.TypeCheck.resolution
(Analysis.TypeEnvironment.ReadOnly.global_resolution environment)
(* TODO(T65923817): Eliminate the need of creating a dummy context here *)
(module Analysis.TypeCheck.DummyContext)
in
let models =
let callables =
Hash_set.fold stubs ~f:(Core.Fn.flip List.cons) ~init:callables
|> List.filter_map ~f:(function
| `Function _ as callable -> Some (callable :> Target.callable_t)
| `Method _ as callable -> Some (callable :> Target.callable_t)
| _ -> None)
in
TaintModelQuery.ModelQuery.apply_all_rules
~resolution
~scheduler
~configuration:taint_configuration
~rule_filter
~rules:queries
~callables
~stubs
~environment
~models
in
let remove_sinks models = Target.Map.map ~f:Model.remove_sinks models in
let add_obscure_sinks models =
let add_obscure_sink models callable =
let model =
Target.Map.find models callable
|> Option.value ~default:Model.empty_model
|> Model.add_obscure_sink ~resolution ~call_target:callable
|> Model.remove_obscureness
in
Target.Map.set models ~key:callable ~data:model
in
stubs
|> Hash_set.filter ~f:(fun callable ->
Target.Map.find models callable >>| Model.is_obscure |> Option.value ~default:true)
|> Hash_set.fold ~f:add_obscure_sink ~init:models
in
let find_missing_flows =
find_missing_flows >>= TaintConfiguration.missing_flows_kind_from_string
in
let models =
match find_missing_flows with
| Some Obscure -> models |> remove_sinks |> add_obscure_sinks
| Some Type -> models |> remove_sinks
| None -> models
in
{ Interprocedural.AnalysisResult.initial_models = models; skip_overrides }
let parse_models_and_queries_from_sources
~scheduler
~static_analysis_configuration:
{
Configuration.StaticAnalysis.verify_models;
configuration = { taint_model_paths; _ };
rule_filter;
find_missing_flows;
dump_model_query_results;
maximum_trace_length;
maximum_tito_depth;
_;
}
~environment
~callables
~stubs
=
let resolution =
Analysis.TypeCheck.resolution
(Analysis.TypeEnvironment.ReadOnly.global_resolution environment)
(* TODO(T65923817): Eliminate the need of creating a dummy context here *)
(module Analysis.TypeCheck.DummyContext)
in
let create_models ~taint_configuration sources =
let map state sources =
List.fold
sources
~init:state
~f:(fun (accumulated_models, errors, skip_overrides, queries) (path, source) ->
let {
ModelParser.models;
errors = new_errors;
skip_overrides = new_skip_overrides;
queries = new_queries;
}
=
ModelParser.parse
~resolution
~path
~source
~configuration:taint_configuration
~callables
~stubs
?rule_filter
()
in
let merged_models =
Target.Map.merge accumulated_models models ~f:(fun ~key:_ -> function
| `Both (left, right) -> Some (Model.join left right)
| `Left model
| `Right model ->
Some model)
in
( merged_models,
List.rev_append new_errors errors,
Set.union skip_overrides new_skip_overrides,
List.rev_append new_queries queries ))
in
let reduce
(models_left, errors_left, skip_overrides_left, queries_left)
(models_right, errors_right, skip_overrides_right, queries_right)
=
let merge_models ~key:_ = function
| `Left model
| `Right model ->
Some model
| `Both (left, right) -> Some (Result.join ~iteration:0 left right)
in
( Target.Map.merge models_left models_right ~f:merge_models,
List.rev_append errors_left errors_right,
Set.union skip_overrides_left skip_overrides_right,
List.rev_append queries_left queries_right )
in
Scheduler.map_reduce
scheduler
~policy:(Scheduler.Policy.legacy_fixed_chunk_count ())
~initial:(Target.Map.empty, [], Ast.Reference.Set.empty, [])
~map
~reduce
~inputs:sources
()
in
let add_models_and_queries_from_sources () =
let find_missing_flows =
find_missing_flows >>= TaintConfiguration.missing_flows_kind_from_string
in
let taint_configuration =
TaintConfiguration.create
~rule_filter
~find_missing_flows
~dump_model_query_results_path:dump_model_query_results
~maximum_trace_length
~maximum_tito_depth
~taint_model_paths
|> TaintConfiguration.abort_on_error
in
TaintConfiguration.register taint_configuration;
let models, errors, skip_overrides, queries =
ModelParser.get_model_sources ~paths:taint_model_paths |> create_models ~taint_configuration
in
ModelVerificationError.register errors;
let () =
if not (List.is_empty errors) then
(* Exit or log errors, depending on whether models need to be verified. *)
if not verify_models then begin
Log.error "Found %d model verification errors!" (List.length errors);
List.iter errors ~f:(fun error -> Log.error "%s" (ModelVerificationError.display error))
end
else begin
Yojson.Safe.pretty_to_string
(`Assoc ["errors", `List (List.map errors ~f:ModelVerificationError.to_json)])
|> Log.print "%s";
exit (Taint.ExitStatus.exit_code Taint.ExitStatus.ModelVerificationError)
end
in
{
initialize_result =
{ Interprocedural.AnalysisResult.initial_models = models; skip_overrides };
query_data = Some { queries; taint_configuration };
}
in
let ({ initialize_result = { initial_models = user_models; _ }; _ } as result) =
add_models_and_queries_from_sources ()
in
let initial_models = ClassModels.infer ~environment ~user_models in
match taint_model_paths with
| [] ->
{
initialize_result =
{
Interprocedural.AnalysisResult.initial_models;
skip_overrides = Ast.Reference.Set.empty;
};
query_data = None;
}
| _ ->
let merged_models =
Target.Map.merge user_models initial_models ~f:(fun ~key:_ -> function
| `Both (left, right) -> Some (Model.join left right)
| `Left model
| `Right model ->
Some model)
in
{
result with
initialize_result = { result.initialize_result with initial_models = merged_models };
}
let initialize_models ~scheduler ~static_analysis_configuration ~environment ~callables ~stubs =
let callables = (callables :> Target.t list) in
let stubs = Target.HashSet.of_list (stubs :> Target.t list) in
Log.info "Parsing taint models...";
let timer = Timer.start () in
let { initialize_result; query_data } =
parse_models_and_queries_from_sources
~scheduler
~static_analysis_configuration
~environment
~callables:(Some (Target.HashSet.of_list callables))
~stubs
in
Statistics.performance ~name:"Parsed taint models" ~phase_name:"Parsing taint models" ~timer ();
match query_data with
| Some query_data ->
Log.info "Generating models from model queries...";
let timer = Timer.start () in
let models =
generate_models_from_queries
~scheduler
~static_analysis_configuration
~environment
~callables
~stubs
~initialize_result
query_data
in
Statistics.performance
~name:"Generated models from model queries"
~phase_name:"Generating models from model queries"
~timer
();
models
| _ -> initialize_result
let analyze ~environment ~callable ~qualifier ~define ~sanitizers ~modes existing_model =
let profiler =
if Ast.Statement.Define.dump_perf (Ast.Node.value define) then
TaintProfiler.create ()
else
TaintProfiler.none
in
let call_graph_of_define =
Interprocedural.CallGraph.SharedMemory.get_or_compute
~callable
~environment
~define:(Ast.Node.value define)
in
let forward, result, triggered_sinks =
TaintProfiler.track_duration ~profiler ~name:"Forward analysis" ~f:(fun () ->
ForwardAnalysis.run
~profiler
~environment
~qualifier
~define
~call_graph_of_define
~existing_model)
in
let backward =
TaintProfiler.track_duration ~profiler ~name:"Backward analysis" ~f:(fun () ->
BackwardAnalysis.run
~profiler
~environment
~qualifier
~define
~call_graph_of_define
~existing_model
~triggered_sinks)
in
let forward, backward =
if Model.ModeSet.contains Model.Mode.SkipAnalysis modes then
empty_model.forward, empty_model.backward
else
forward, backward
in
let model = { Model.forward; backward; sanitizers; modes } in
let model =
TaintProfiler.track_duration ~profiler ~name:"Sanitize" ~f:(fun () ->
Model.apply_sanitizers model)
in
TaintProfiler.dump profiler;
result, model
let invalidate_caches ~environment =
(* To preserve memory, let's flush shared memory caches. *)
Interprocedural.FixpointState.invalidate_model_cache ();
Interprocedural.FixpointState.invalidate_result_cache ();
Analysis.TypeEnvironment.ReadOnly.invalidate_local_annotations_cache environment;
()
let analyze
~environment
~callable
~qualifier
~define:
({ Ast.Node.value = { Ast.Statement.Define.signature = { name; _ }; _ }; _ } as define)
~existing
=
invalidate_caches ~environment;
let define_qualifier = Ast.Reference.delocalize name in
let open Analysis in
let open Ast in
let module_reference =
let global_resolution = TypeEnvironment.ReadOnly.global_resolution environment in
let annotated_global_environment =
GlobalResolution.annotated_global_environment global_resolution
in
(* Pysa inlines decorators when a function is decorated. However, we want issues and models to
point to the lines in the module where the decorator was defined, not the module where it
was inlined. So, look up the originating module, if any, and use that as the module
qualifier. *)
InlineDecorator.InlinedNameToOriginalName.get define_qualifier
>>= AnnotatedGlobalEnvironment.ReadOnly.get_global_location annotated_global_environment
>>| fun { Location.WithModule.module_reference; _ } -> module_reference
in
let qualifier = Option.value ~default:qualifier module_reference in
match existing with
| Some ({ Model.modes; _ } as model) when Model.ModeSet.contains Model.Mode.SkipAnalysis modes
->
let () = Log.info "Skipping taint analysis of %a" Target.pretty_print callable in
[], model
| Some ({ sanitizers; modes; _ } as model) ->
analyze ~callable ~environment ~qualifier ~define ~sanitizers ~modes model
| None ->
analyze
~callable
~environment
~qualifier
~define
~sanitizers:Model.Sanitizers.empty
~modes:Model.ModeSet.empty
empty_model
let report = Taint.Reporting.report
end)