source/command/analyzeCommand.ml (331 lines of code) (raw):
(*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open Core
(* Analyze command uses the same exit code scheme as check command. *)
module ExitStatus = struct
type t =
| CheckStatus of CheckCommand.ExitStatus.t
| PysaStatus of Taint.ExitStatus.t
let exit_code = function
(* 1-9 are reserved for CheckCommand.ExitStatus *)
| CheckStatus status -> CheckCommand.ExitStatus.exit_code status
(* 10-19 are reserved for Taint.ExitStatus *)
| PysaStatus status -> Taint.ExitStatus.exit_code status
end
module AnalyzeConfiguration = struct
type t = {
base: CommandStartup.BaseConfiguration.t;
dump_call_graph: PyrePath.t option;
dump_model_query_results: PyrePath.t option;
find_missing_flows: string option;
inline_decorators: bool;
maximum_tito_depth: int option;
maximum_trace_length: int option;
no_verify: bool;
repository_root: PyrePath.t option;
rule_filter: int list option;
save_results_to: PyrePath.t option;
strict: bool;
taint_model_paths: PyrePath.t list;
use_cache: bool;
}
[@@deriving sexp, compare, hash]
let of_yojson json =
let open Yojson.Safe.Util in
let open JsonParsing in
(* Parsing logic *)
try
match CommandStartup.BaseConfiguration.of_yojson json with
| Result.Error _ as error -> error
| Result.Ok base ->
let dump_call_graph = optional_path_member "dump_call_graph" json in
let dump_model_query_results = optional_path_member "dump_model_query_results" json in
let find_missing_flows = optional_string_member "find_missing_flows" json in
let inline_decorators = bool_member "inline_decorators" ~default:false json in
let maximum_tito_depth = optional_int_member "maximum_tito_depth" json in
let maximum_trace_length = optional_int_member "maximum_trace_length" json in
let no_verify = bool_member "no_verify" ~default:false json in
let repository_root = optional_path_member "repository_root" json in
let rule_filter =
member "rule_filter" json
|> function
| `Null -> None
| _ as json -> Some (convert_each to_int json)
in
let save_results_to = optional_path_member "save_results_to" json in
let strict = bool_member "strict" ~default:false json in
let taint_model_paths = json |> path_list_member "taint_model_paths" ~default:[] in
let use_cache = bool_member "use_cache" ~default:false json in
Result.Ok
{
base;
dump_call_graph;
dump_model_query_results;
find_missing_flows;
inline_decorators;
maximum_tito_depth;
maximum_trace_length;
no_verify;
repository_root;
rule_filter;
save_results_to;
strict;
taint_model_paths;
use_cache;
}
with
| Type_error (message, _)
| Undefined (message, _) ->
Result.Error message
| other_exception -> Result.Error (Exn.to_string other_exception)
let analysis_configuration_of
{
base =
{
CommandStartup.BaseConfiguration.source_paths;
search_paths;
excludes;
checked_directory_allowlist;
checked_directory_blocklist;
extensions;
log_path;
global_root;
local_root;
debug;
python_version = { Configuration.PythonVersion.major; minor; micro };
parallel;
number_of_workers;
shared_memory =
{ Configuration.SharedMemory.heap_size; dependency_table_power; hash_table_power };
enable_type_comments;
remote_logging = _;
profiling_output = _;
memory_profiling_output = _;
};
dump_call_graph;
dump_model_query_results;
find_missing_flows;
maximum_tito_depth;
maximum_trace_length;
no_verify;
rule_filter;
save_results_to;
strict;
taint_model_paths;
use_cache;
inline_decorators = _;
repository_root = _;
}
=
let configuration =
Configuration.Analysis.create
~parallel
~analyze_external_sources:false
~filter_directories:checked_directory_allowlist
~ignore_all_errors:checked_directory_blocklist
~number_of_workers
~local_root:(Option.value local_root ~default:global_root)
~project_root:global_root
~search_paths:(List.map search_paths ~f:SearchPath.normalize)
~taint_model_paths
~strict
~debug
~show_error_traces:false
~excludes
~extensions
~store_type_errors:false
~incremental_style:Configuration.Analysis.Shallow
~log_directory:(PyrePath.absolute log_path)
~python_major_version:major
~python_minor_version:minor
~python_micro_version:micro
~shared_memory_heap_size:heap_size
~shared_memory_dependency_table_power:dependency_table_power
~shared_memory_hash_table_power:hash_table_power
~enable_type_comments
~source_paths:(Configuration.SourcePaths.to_search_paths source_paths)
()
in
{
Configuration.StaticAnalysis.configuration;
result_json_path = save_results_to;
dump_call_graph;
verify_models = not no_verify;
rule_filter;
find_missing_flows;
dump_model_query_results;
use_cache;
maximum_trace_length;
maximum_tito_depth;
}
end
let with_performance_tracking f =
let timer = Timer.start () in
let result = f () in
let { Caml.Gc.minor_collections; major_collections; compactions; _ } = Caml.Gc.stat () in
Statistics.performance
~name:"analyze"
~timer
~integers:
[
"gc_minor_collections", minor_collections;
"gc_major_collections", major_collections;
"gc_compactions", compactions;
]
();
result
let run_taint_analysis
~static_analysis_configuration:
({ Configuration.StaticAnalysis.configuration; use_cache; _ } as
static_analysis_configuration)
~inline_decorators
~build_system
~repository_root
()
=
let run () =
Scheduler.with_scheduler ~configuration ~f:(fun scheduler ->
let analysis_kind = TaintAnalysis.abstract_kind in
Interprocedural.FixpointAnalysis.initialize_configuration
~static_analysis_configuration
analysis_kind;
(* Collect decorators to skip before type-checking because decorator inlining happens in an
early phase of type-checking and needs to know which decorators to skip. *)
Service.StaticAnalysis.parse_and_save_decorators_to_skip ~inline_decorators configuration;
let cache =
Service.StaticAnalysis.Cache.load ~scheduler ~configuration ~enabled:use_cache
in
let environment = Service.StaticAnalysis.type_check ~scheduler ~configuration ~cache in
let qualifiers =
Analysis.TypeEnvironment.module_tracker environment
|> Analysis.ModuleTracker.tracked_explicit_modules
in
let read_only_environment = Analysis.TypeEnvironment.read_only environment in
let class_hierarchy_graph =
Service.StaticAnalysis.build_class_hierarchy_graph
~scheduler
~cache
~environment:read_only_environment
~qualifiers
in
let _ = Service.StaticAnalysis.build_class_intervals class_hierarchy_graph in
let initial_callables =
Service.StaticAnalysis.fetch_initial_callables
~scheduler
~configuration
~cache
~environment:read_only_environment
~qualifiers
in
let { Interprocedural.AnalysisResult.initial_models; skip_overrides } =
let { Service.StaticAnalysis.callables_with_dependency_information; stubs; _ } =
initial_callables
in
Interprocedural.FixpointAnalysis.initialize_models
analysis_kind
~static_analysis_configuration
~scheduler
~environment:(Analysis.TypeEnvironment.read_only environment)
~callables:(List.map callables_with_dependency_information ~f:fst)
~stubs
in
let ast_environment =
environment
|> Analysis.TypeEnvironment.read_only
|> Analysis.TypeEnvironment.ReadOnly.ast_environment
in
let filename_lookup path_reference =
match
Server.RequestHandler.instantiate_path
~build_system
~configuration
~ast_environment
path_reference
with
| None -> None
| Some full_path ->
let root = Option.value repository_root ~default:configuration.local_root in
PyrePath.get_relative_to_root ~root ~path:(PyrePath.create_absolute full_path)
in
Service.StaticAnalysis.analyze
~scheduler
~analysis:analysis_kind
~static_analysis_configuration
~cache
~filename_lookup
~environment
~qualifiers
~initial_callables
~initial_models
~skip_overrides
())
in
with_performance_tracking run
let run_analyze analyze_configuration =
let {
AnalyzeConfiguration.base = { CommandStartup.BaseConfiguration.source_paths; _ };
inline_decorators;
repository_root;
_;
}
=
analyze_configuration
in
Server.BuildSystem.with_build_system source_paths ~f:(fun build_system ->
let static_analysis_configuration =
AnalyzeConfiguration.analysis_configuration_of analyze_configuration
in
run_taint_analysis
~static_analysis_configuration
~build_system
~inline_decorators
~repository_root
();
Lwt.return (ExitStatus.CheckStatus CheckCommand.ExitStatus.Ok))
let run_analyze configuration_file =
let exit_status =
match
CommandStartup.read_and_parse_json configuration_file ~f:AnalyzeConfiguration.of_yojson
with
| Result.Error message ->
Log.error "%s" message;
ExitStatus.CheckStatus CheckCommand.ExitStatus.PyreError
| Result.Ok
({
AnalyzeConfiguration.base =
{
CommandStartup.BaseConfiguration.global_root;
local_root;
debug;
remote_logging;
profiling_output;
memory_profiling_output;
_;
};
_;
} as analyze_configuration) ->
CommandStartup.setup_global_states
~global_root
~local_root
~debug
~additional_logging_sections:[]
~remote_logging
~profiling_output
~memory_profiling_output
();
Lwt_main.run
(Lwt.catch
(fun () -> run_analyze analyze_configuration)
(fun exn -> Lwt.return (ExitStatus.CheckStatus (CheckCommand.on_exception exn))))
in
Statistics.flush ();
exit (ExitStatus.exit_code exit_status)
let command =
let filename_argument = Command.Param.(anon ("filename" %: Filename.arg_type)) in
Command.basic
~summary:"Runs taint analysis"
(Command.Param.map filename_argument ~f:(fun filename () -> run_analyze filename))