source/interprocedural_analyses/taint/taintConfiguration.ml (1,132 lines of code) (raw):

(* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. *) open Core open Pyre module SharedMemory = Memory module Json = Yojson.Safe module Rule = struct type t = { sources: Sources.t list; sinks: Sinks.t list; transforms: TaintTransform.t list; code: int; name: string; message_format: string; (* format *) } [@@deriving compare, show] end type literal_string_sink = { pattern: Re2.t; sink_kind: Sinks.t; } type implicit_sinks = { conditional_test: Sinks.t list; literal_string_sinks: literal_string_sink list; } let empty_implicit_sinks = { conditional_test = []; literal_string_sinks = [] } type literal_string_source = { pattern: Re2.t; source_kind: Sources.t; } type implicit_sources = { literal_strings: literal_string_source list } let empty_implicit_sources = { literal_strings = [] } type analysis_model_constraints = { maximum_model_width: int; maximum_return_access_path_length: int; maximum_overrides_to_analyze: int option; maximum_trace_length: int option; maximum_tito_depth: int option; } let default_analysis_model_constraints = { maximum_model_width = 25; maximum_return_access_path_length = 10; maximum_overrides_to_analyze = None; maximum_trace_length = None; maximum_tito_depth = None; } type partial_sink_converter = (Sources.t list * Sinks.t) list String.Map.Tree.t type missing_flows_kind = (* Find missing flows through obscure models. *) | Obscure (* Find missing flows due to missing type information. *) | Type [@@deriving compare, show] let missing_flows_kind_from_string = function | "obscure" -> Some Obscure | "type" -> Some Type | _ -> None type t = { sources: AnnotationParser.source_or_sink list; sinks: AnnotationParser.source_or_sink list; transforms: TaintTransform.t list; features: string list; rules: Rule.t list; implicit_sinks: implicit_sinks; implicit_sources: implicit_sources; partial_sink_converter: partial_sink_converter; partial_sink_labels: string list String.Map.Tree.t; matching_sources: Sources.Set.t Sinks.Map.t; matching_sinks: Sinks.Set.t Sources.Map.t; possible_tito_transforms: TaintTransforms.Set.t; find_missing_flows: missing_flows_kind option; dump_model_query_results_path: PyrePath.t option; analysis_model_constraints: analysis_model_constraints; lineage_analysis: bool; } let empty = { sources = []; sinks = []; transforms = []; features = []; rules = []; partial_sink_converter = String.Map.Tree.empty; implicit_sinks = empty_implicit_sinks; implicit_sources = empty_implicit_sources; partial_sink_labels = String.Map.Tree.empty; matching_sources = Sinks.Map.empty; matching_sinks = Sources.Map.empty; possible_tito_transforms = TaintTransforms.Set.empty; find_missing_flows = None; dump_model_query_results_path = None; analysis_model_constraints = default_analysis_model_constraints; lineage_analysis = false; } module ConfigurationSharedMemory = SharedMemory.WithCache.Make (SharedMemory.SingletonKey) (struct type nonrec t = t let prefix = Prefix.make () let description = "Taint configuration" let unmarshall value = Marshal.from_string value 0 end) module Error = struct type kind = | FileNotFound | FileRead | InvalidJson of string | NoConfigurationFound | UnexpectedJsonType of { json: Json.t; message: string; section: string option; } | MissingKey of { key: string; section: string; } | UnknownKey of { key: string; section: string; } | UnsupportedSource of string | UnsupportedSink of string | UnsupportedTransform of string | UnexpectedCombinedSourceRule of Json.t | PartialSinkDuplicate of string | InvalidLabelMultiSink of { label: string; sink: string; labels: string list; } | InvalidMultiSink of string | RuleCodeDuplicate of int | OptionDuplicate of string | SourceDuplicate of string | SinkDuplicate of string | TransformDuplicate of string | FeatureDuplicate of string [@@deriving equal] type t = { kind: kind; path: PyrePath.t option; } [@@deriving equal] let create ~path ~kind = { kind; path = Some path } let pp_kind formatter = function | FileNotFound -> Format.fprintf formatter "File not found" | FileRead -> Format.fprintf formatter "Could not read file" | InvalidJson error -> Format.fprintf formatter "%s" error | NoConfigurationFound -> Format.fprintf formatter "No `.config` was found in the taint directories" | UnexpectedJsonType { json; message; section } -> let json = match json with | `Null -> "" | _ -> Format.sprintf ": `%s`" (Json.to_string json) in let section = match section with | Some section -> Format.sprintf " for section `%s`" section | None -> "" in Format.fprintf formatter "%s%s%s" message json section | MissingKey { key; section } -> Format.fprintf formatter "Required key `%s` is missing in section `%s`" key section | UnknownKey { key; section } -> Format.fprintf formatter "Unknown key `%s` encountered in section `%s`" key section | UnsupportedSource source -> Format.fprintf formatter "Unsupported taint source `%s`" source | UnsupportedSink sink -> Format.fprintf formatter "Unsupported taint sink `%s`" sink | UnsupportedTransform transform -> Format.fprintf formatter "Unsupported taint transform `%s`" transform | UnexpectedCombinedSourceRule json -> Format.fprintf formatter "Combined source rules must be of the form {\"a\": [\"SourceA\"], \"b\": [\"SourceB\"]}, \ got `%s`" (Json.to_string json) | PartialSinkDuplicate partial_sink -> Format.fprintf formatter "Partial sinks must be unique - an entry for `%s` already exists" partial_sink | InvalidLabelMultiSink { label; sink; labels } -> Format.fprintf formatter "`%s` is an invalid label For multi sink `%s` (choices: `%s`)" label sink (String.concat labels ~sep:", ") | InvalidMultiSink sink -> Format.fprintf formatter "`%s` is not a multi sink" sink | RuleCodeDuplicate code -> Format.fprintf formatter "Multiple rules share the same code `%d`" code | OptionDuplicate name -> Format.fprintf formatter "Multiple values were passed in for option `%s`" name | SourceDuplicate name -> Format.fprintf formatter "Duplicate entry for source: `%s`" name | SinkDuplicate name -> Format.fprintf formatter "Duplicate entry for sink: `%s`" name | TransformDuplicate name -> Format.fprintf formatter "Duplicate entry for transform: `%s`" name | FeatureDuplicate name -> Format.fprintf formatter "Duplicate entry for feature: `%s`" name let code = function | FileNotFound -> 1 | FileRead -> 2 | InvalidJson _ -> 3 | NoConfigurationFound -> 4 | UnexpectedJsonType _ -> 5 | MissingKey _ -> 6 | UnknownKey _ -> 7 | UnsupportedSource _ -> 8 | UnsupportedSink _ -> 9 | UnexpectedCombinedSourceRule _ -> 10 | PartialSinkDuplicate _ -> 11 | InvalidLabelMultiSink _ -> 12 | InvalidMultiSink _ -> 13 | RuleCodeDuplicate _ -> 14 | OptionDuplicate _ -> 15 | SourceDuplicate _ -> 16 | SinkDuplicate _ -> 16 | FeatureDuplicate _ -> 18 | UnsupportedTransform _ -> 19 | TransformDuplicate _ -> 20 let show_kind = Format.asprintf "%a" pp_kind let pp formatter = function | { path = None; kind } -> pp_kind formatter kind | { path = Some path; kind } -> Format.fprintf formatter "%a: %a" PyrePath.pp path pp_kind kind let show = Format.asprintf "%a" pp let to_json { path; kind } = let path = match path with | None -> `Null | Some path -> `String (PyrePath.absolute path) in `Assoc ["description", `String (show_kind kind); "path", path; "code", `Int (code kind)] end (* Given a rule to find flows of the form: * source -> T1 -> T2 -> T3 -> ... -> Tn -> sink * Following are different ways we can find matching flows: * source -> T1:T2:T3:...:Tn:sink * T1:source -> T2:T3:...:Tn:sink * T2:T1:source -> T3:...:Tn:sink * ... * Tn:...:T3:T2:T1:source -> sink *) let transform_splits transforms = let rec split ~result ~prefix ~suffix = let result = (prefix, suffix) :: result in match suffix with | [] -> result | next :: suffix -> split ~result ~prefix:(next :: prefix) ~suffix in split ~result:[] ~prefix:[] ~suffix:transforms let matching_kinds_from_rules rules = let add_sources_sinks (matching_sources, matching_sinks) (sources, sinks) = let sinks_set = Sinks.Set.of_list sinks in let sources_set = Sources.Set.of_list sources in let update_matching_sources matching_sources sink = Sinks.Map.update sink (function | None -> Some sources_set | Some sources -> Some (Sources.Set.union sources sources_set)) matching_sources in let update_matching_sinks matching_sinks source = Sources.Map.update source (function | None -> Some sinks_set | Some sinks -> Some (Sinks.Set.union sinks sinks_set)) matching_sinks in let matching_sources = List.fold ~f:update_matching_sources ~init:matching_sources sinks in let matching_sinks = List.fold ~f:update_matching_sinks ~init:matching_sinks sources in matching_sources, matching_sinks in let add_rule sofar { Rule.sources; sinks; transforms; _ } = let update sofar (source_transforms, sink_transforms) = let sources = if List.is_empty source_transforms then sources else List.map sources ~f:(fun base -> Sources.Transform { base; global = TaintTransforms.of_named_transforms source_transforms; local = TaintTransforms.empty; }) in let sinks = if List.is_empty sink_transforms then sinks else List.map sinks ~f:(fun base -> Sinks.Transform { base; global = TaintTransforms.of_named_transforms sink_transforms; local = TaintTransforms.empty; }) in add_sources_sinks sofar (sources, sinks) in transform_splits transforms |> List.fold ~init:sofar ~f:update in List.fold ~f:add_rule ~init:(Sinks.Map.empty, Sources.Map.empty) rules (* For a TITO to extend to an actual issue, the transforms in it must be a substring (contiguous subsequence) of transforms appearing in a rule. In addition to optimization, this is used for ensuring termination. We do not consider arbitrarily long transform sequences in the analysis. *) let possible_tito_transforms_from_rules rules = let rec suffixes l = l :: Option.value_map (List.tl l) ~default:[] ~f:suffixes in let prefixes l = List.rev l |> suffixes |> List.map ~f:List.rev in let substrings l = List.concat_map (prefixes l) ~f:suffixes in List.concat_map rules ~f:(fun { Rule.transforms; _ } -> substrings transforms) |> List.map ~f:TaintTransforms.of_named_transforms |> TaintTransforms.Set.of_list module PartialSinkConverter = struct let mangle { Sinks.kind; label } = Format.sprintf "%s$%s" kind label let add map ~first_sources ~first_sinks ~second_sources ~second_sinks = let add map (first_sink, second_sink) = (* Trigger second sink when the first sink matches a source, and vice versa. *) String.Map.Tree.add_multi map ~key:(mangle first_sink) ~data:(first_sources, Sinks.TriggeredPartialSink second_sink) |> String.Map.Tree.add_multi ~key:(mangle second_sink) ~data:(second_sources, Sinks.TriggeredPartialSink first_sink) in List.cartesian_product first_sinks second_sinks |> List.fold ~f:add ~init:map let merge left right = String.Map.Tree.merge ~f: (fun ~key:_ -> function | `Left value | `Right value -> Some value | `Both (left, right) -> Some (left @ right)) left right let get_triggered_sink sink_to_sources ~partial_sink ~source = match mangle partial_sink |> String.Map.Tree.find sink_to_sources with | Some source_and_sink_list -> List.find source_and_sink_list ~f:(fun (supported_sources, _) -> List.exists supported_sources ~f:(Sources.equal source)) >>| snd | _ -> None end let parse source_jsons = let open Result in let json_exception_to_error ~path ?section f = try f () with | Json.Util.Type_error (message, json) | Json.Util.Undefined (message, json) -> Error [Error.create ~path ~kind:(Error.UnexpectedJsonType { json; message; section })] in let json_bool_member ~path key value ~default = json_exception_to_error ~path ~section:key (fun () -> Json.Util.member key value |> Yojson.Safe.Util.to_bool_option |> Option.value ~default |> Result.return) in let json_string_member ~path key value = json_exception_to_error ~path ~section:key (fun () -> Json.Util.member key value |> Json.Util.to_string |> Result.return) in let json_integer_member ~path key value = json_exception_to_error ~path ~section:key (fun () -> Json.Util.member key value |> Json.Util.to_int |> Result.return) in let member name json = try Json.Util.member name json with | Not_found -> `Null in let array_member ~path ?section name json = match member name json with | `Null -> Ok [] | json -> json_exception_to_error ~path ?section (fun () -> Json.Util.to_list json |> Result.return) in let json_string_list ~path ?section json = json_exception_to_error ~path ?section (fun () -> Json.Util.to_list json |> List.map ~f:Json.Util.to_string |> Result.return) in let parse_kind ~path ?section json = match member "kind" json with | `Null -> Ok AnnotationParser.Named | `String "parametric" -> Ok AnnotationParser.Parametric | json -> Error [ Error.create ~path ~kind:(Error.UnexpectedJsonType { json; message = "Unexpected kind"; section }); ] in let check_keys ~path ~section ~required_keys ~valid_keys ~current_keys = let valid_keys_hash_set = String.Hash_set.of_list valid_keys in let current_keys_hash_set = String.Hash_set.of_list current_keys in let check_required_key_present key = if not (Hash_set.mem current_keys_hash_set key) then Error (Error.create ~path ~kind:(Error.MissingKey { key; section })) else Ok () in let check_key_is_valid key = if not (Hash_set.mem valid_keys_hash_set key) then Error (Error.create ~path ~kind:(Error.UnknownKey { key; section })) else Ok () in List.map current_keys ~f:check_key_is_valid |> List.rev_append (List.map required_keys ~f:check_required_key_present) |> Result.combine_errors_unit in let parse_source_or_sink_annotation ~path ~section json = check_keys ~path ~section ~required_keys:["name"] ~current_keys:(Json.Util.keys json) ~valid_keys:["name"; "kind"; "comment"] >>= fun () -> json_string_member ~path "name" json >>= fun name -> parse_kind ~path json >>| fun kind -> { AnnotationParser.name; kind } in let parse_source_annotations (path, json) = array_member ~path "sources" json >>= fun json -> List.map ~f:(parse_source_or_sink_annotation ~path ~section:"sources") json |> Result.combine_errors |> Result.map_error ~f:List.concat in let parse_sink_annotations (path, json) = array_member ~path "sinks" json >>= fun json -> List.map ~f:(parse_source_or_sink_annotation ~path ~section:"sinks") json |> Result.combine_errors |> Result.map_error ~f:List.concat in let parse_transform ~path ~section json = check_keys ~path ~section ~required_keys:["name"] ~current_keys:(Json.Util.keys json) ~valid_keys:["name"; "comment"] >>= fun () -> json_string_member ~path "name" json >>= fun name -> Ok (TaintTransform.Named name) in let parse_transforms (path, json) = array_member ~path "transforms" json >>= fun json -> List.map ~f:(parse_transform ~path ~section:"transforms") json |> Result.combine_errors |> Result.map_error ~f:List.concat in let parse_features (path, json) = array_member ~path "features" json >>= fun json -> List.map ~f:(json_string_member ~path "name") json |> Result.combine_errors |> Result.map_error ~f:List.concat in let seen_rules = Int.Hash_set.create () in let validate_code_uniqueness ~path code = if Hash_set.mem seen_rules code then Error [Error.create ~path ~kind:(Error.RuleCodeDuplicate code)] else ( Hash_set.add seen_rules code; Ok ()) in let parse_source_reference ~path ~allowed_sources source = AnnotationParser.parse_source ~allowed:allowed_sources source |> Result.map_error ~f:(fun _ -> Error.create ~path ~kind:(Error.UnsupportedSource source)) in let parse_sink_reference ~path ~allowed_sinks sink = AnnotationParser.parse_sink ~allowed:allowed_sinks sink |> Result.map_error ~f:(fun _ -> Error.create ~path ~kind:(Error.UnsupportedSink sink)) in let parse_transform_reference ~path ~allowed_transforms transform = AnnotationParser.parse_transform ~allowed:allowed_transforms transform |> Result.map_error ~f:(fun _ -> Error.create ~path ~kind:(Error.UnsupportedTransform transform)) in let parse_rules ~allowed_sources ~allowed_sinks ~allowed_transforms (path, json) = let parse_rule json = let required_keys = ["name"; "code"; "sources"; "sinks"; "message_format"] in let valid_keys = "oncall" :: "comment" :: "transforms" :: required_keys in check_keys ~path ~section:"rules" ~required_keys ~valid_keys ~current_keys:(Json.Util.keys json) >>= fun () -> Json.Util.member "sources" json |> json_string_list ~path ~section:"rules" >>= fun sources -> List.map ~f:(parse_source_reference ~path ~allowed_sources) sources |> Result.combine_errors >>= fun sources -> Json.Util.member "sinks" json |> json_string_list ~path ~section:"rules" >>= fun sinks -> List.map ~f:(parse_sink_reference ~path ~allowed_sinks) sinks |> Result.combine_errors >>= fun sinks -> (match member "transforms" json with | `Null -> Ok [] | transforms -> json_string_list ~path ~section:"rules" transforms) >>= fun transforms -> List.map ~f:(parse_transform_reference ~path ~allowed_transforms) transforms |> Result.combine_errors >>= fun transforms -> json_string_member ~path "name" json >>= fun name -> json_string_member ~path "message_format" json >>= fun message_format -> json_integer_member ~path "code" json >>= fun code -> validate_code_uniqueness ~path code >>| fun () -> { Rule.sources; sinks; transforms; name; code; message_format } in array_member ~path "rules" json >>= fun rules -> List.map ~f:parse_rule rules |> Result.combine_errors |> Result.map_error ~f:List.concat in let parse_combined_source_rules ~allowed_sources (path, json) = let parse_combined_source_rule sofar json = sofar >>= fun (rules, partial_sink_converter, partial_sink_labels) -> json_string_member ~path "name" json >>= fun name -> json_string_member ~path "message_format" json >>= fun message_format -> json_integer_member ~path "code" json >>= fun code -> validate_code_uniqueness ~path code >>= fun () -> let sources = Json.Util.member "sources" json in let keys = Json.Util.keys sources in match keys with | [first; second] -> let parse_sources sources = (match sources with | `String source -> Ok [source] | `List _ -> json_string_list ~path sources | _ -> Error [Error.create ~path ~kind:(Error.UnexpectedCombinedSourceRule json)]) >>= fun sources -> List.map ~f:(parse_source_reference ~path ~allowed_sources) sources |> Result.combine_errors in Json.Util.member first sources |> parse_sources >>= fun first_sources -> Json.Util.member second sources |> parse_sources >>= fun second_sources -> json_string_member ~path "partial_sink" json >>= fun partial_sink -> if String.Map.Tree.mem partial_sink_labels partial_sink then Error [Error.create ~path ~kind:(Error.PartialSinkDuplicate partial_sink)] else let partial_sink_labels = String.Map.Tree.set partial_sink_labels ~key:partial_sink ~data:[first; second] in let create_partial_sink label sink = match String.Map.Tree.find partial_sink_labels sink with | Some labels when not (List.mem ~equal:String.equal labels label) -> Error [Error.create ~path ~kind:(Error.InvalidLabelMultiSink { label; sink; labels })] | None -> Error [Error.create ~path ~kind:(Error.InvalidMultiSink sink)] | _ -> Ok { Sinks.kind = sink; label } in create_partial_sink first partial_sink >>= fun first_sink -> create_partial_sink second partial_sink >>| fun second_sink -> ( { Rule.sources = first_sources; sinks = [Sinks.TriggeredPartialSink first_sink]; transforms = []; name; code; message_format; } :: { Rule.sources = second_sources; sinks = [Sinks.TriggeredPartialSink second_sink]; transforms = []; name; code; message_format; } :: rules, PartialSinkConverter.add partial_sink_converter ~first_sources ~first_sinks:[first_sink] ~second_sources ~second_sinks:[second_sink], partial_sink_labels ) | _ -> Error [Error.create ~path ~kind:(Error.UnexpectedCombinedSourceRule json)] in array_member ~path "combined_source_rules" json >>= List.fold ~init:(Ok ([], String.Map.Tree.empty, String.Map.Tree.empty)) ~f:parse_combined_source_rule in let parse_implicit_sinks ~allowed_sinks (path, json) = match member "implicit_sinks" json with | `Null -> Ok empty_implicit_sinks | implicit_sinks -> check_keys ~path ~section:"implicit_sinks" ~required_keys:[] ~valid_keys:["conditional_test"; "literal_strings"] ~current_keys:(Json.Util.keys implicit_sinks) >>= fun () -> (match member "conditional_test" implicit_sinks with | `Null -> Ok [] | conditional_test -> json_string_list ~path conditional_test >>= fun sinks -> List.map ~f:(parse_sink_reference ~path ~allowed_sinks) sinks |> Result.combine_errors) >>= fun conditional_test -> array_member ~path "literal_strings" implicit_sinks >>= fun literal_strings -> List.map ~f:(fun json -> json_string_member ~path "kind" json >>= fun sink -> parse_sink_reference ~path ~allowed_sinks sink |> Result.map_error ~f:(fun error -> [error]) >>= fun sink_kind -> json_string_member ~path "regexp" json >>| fun pattern -> { sink_kind; pattern = Re2.create_exn pattern }) literal_strings |> Result.combine_errors |> Result.map_error ~f:List.concat >>| fun literal_string_sinks -> { conditional_test; literal_string_sinks } in let parse_implicit_sources ~allowed_sources (path, json) = match member "implicit_sources" json with | `Null -> Ok { literal_strings = [] } | implicit_sources -> check_keys ~path ~section:"implicit_sources" ~required_keys:[] ~valid_keys:["conditional_test"; "literal_strings"] ~current_keys:(Json.Util.keys implicit_sources) >>= fun () -> array_member ~path "literal_strings" implicit_sources >>= fun literal_strings -> List.map ~f:(fun json -> json_string_member ~path "kind" json >>= fun source -> parse_source_reference ~path ~allowed_sources source |> Result.map_error ~f:(fun error -> [error]) >>= fun source_kind -> json_string_member ~path "regexp" json >>| fun pattern -> { source_kind; pattern = Re2.create_exn pattern }) literal_strings |> Result.combine_errors |> Result.map_error ~f:List.concat >>| fun literal_strings -> { literal_strings } in List.map source_jsons ~f:parse_source_annotations |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.concat >>= fun sources -> List.map source_jsons ~f:parse_sink_annotations |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.concat >>= fun sinks -> List.map source_jsons ~f:parse_transforms |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.concat >>= fun transforms -> List.map source_jsons ~f:parse_features |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.concat >>= fun features -> List.map source_jsons ~f:(parse_rules ~allowed_sources:sources ~allowed_sinks:sinks ~allowed_transforms:transforms) |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.concat >>= fun rules -> List.map source_jsons ~f:(parse_combined_source_rules ~allowed_sources:sources) |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.unzip3 >>= fun (generated_combined_rules, partial_sink_converters, partial_sink_labels) -> let generated_combined_rules = List.concat generated_combined_rules in let partial_sink_converter = List.fold partial_sink_converters ~init:String.Map.Tree.empty ~f:PartialSinkConverter.merge in let partial_sink_labels = List.fold partial_sink_labels ~init:String.Map.Tree.empty ~f:PartialSinkConverter.merge in let merge_implicit_sinks left right = { conditional_test = left.conditional_test @ right.conditional_test; literal_string_sinks = left.literal_string_sinks @ right.literal_string_sinks; } in List.map source_jsons ~f:(parse_implicit_sinks ~allowed_sinks:sinks) |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.fold ~init:empty_implicit_sinks ~f:merge_implicit_sinks >>= fun implicit_sinks -> let parse_integer_option name = let parse_single_json (path, json) = match member "options" json with | `Null -> Ok None | options -> ( match member name options with | `Null -> Ok None | `Int value -> Ok (Some value) | json -> Error (Error.create ~path ~kind: (Error.UnexpectedJsonType { json; message = "Expected integer, got"; section = Some "options" }))) in List.map source_jsons ~f:parse_single_json |> Result.combine_errors >>| List.filter_map ~f:Fn.id >>= function | [] -> Ok None | [value] -> Ok (Some value) | _ -> Error [{ Error.path = None; kind = Error.OptionDuplicate name }] in parse_integer_option "maximum_overrides_to_analyze" >>= fun maximum_overrides_to_analyze -> parse_integer_option "maximum_trace_length" >>= fun maximum_trace_length -> parse_integer_option "maximum_tito_depth" >>= fun maximum_tito_depth -> let merge_implicit_sources left right = { literal_strings = left.literal_strings @ right.literal_strings } in List.map source_jsons ~f:(parse_implicit_sources ~allowed_sources:sources) |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.fold ~init:empty_implicit_sources ~f:merge_implicit_sources >>= fun implicit_sources -> let parse_lineage_analysis (path, json) = json_bool_member ~path "lineage_analysis" json ~default:false in List.map ~f:parse_lineage_analysis source_jsons |> Result.combine_errors |> Result.map_error ~f:List.concat >>| List.exists ~f:Fn.id >>| fun lineage_analysis -> let rules = List.rev_append rules generated_combined_rules in let matching_sources, matching_sinks = matching_kinds_from_rules rules in let possible_tito_transforms = possible_tito_transforms_from_rules rules in { sources; sinks; transforms; features; rules; partial_sink_converter; implicit_sinks; implicit_sources; partial_sink_labels; matching_sources; matching_sinks; possible_tito_transforms; find_missing_flows = None; dump_model_query_results_path = None; analysis_model_constraints = { default_analysis_model_constraints with maximum_overrides_to_analyze; maximum_trace_length; maximum_tito_depth; }; lineage_analysis; } let validate ({ sources; sinks; transforms; features; _ } as configuration) = let ensure_list_unique ~get_name ~get_error elements = let seen = String.Hash_set.create () in let ensure_unique element = let element = get_name element in if Hash_set.mem seen element then Error [{ Error.path = None; kind = get_error element }] else ( Hash_set.add seen element; Ok ()) in List.map elements ~f:ensure_unique |> Result.combine_errors_unit |> Result.map_error ~f:List.concat in Result.combine_errors_unit [ ensure_list_unique ~get_name:(fun { AnnotationParser.name; _ } -> name) ~get_error:(fun name -> Error.SourceDuplicate name) sources; ensure_list_unique ~get_name:(fun { AnnotationParser.name; _ } -> name) ~get_error:(fun name -> Error.SinkDuplicate name) sinks; ensure_list_unique ~get_name:TaintTransform.show ~get_error:(fun name -> Error.TransformDuplicate name) transforms; ensure_list_unique ~get_name:Fn.id ~get_error:(fun name -> Error.FeatureDuplicate name) features; ] |> Result.map_error ~f:List.concat |> Result.map ~f:(fun () -> configuration) let abort_on_error = function | Ok configuration -> configuration | Error errors -> Yojson.Safe.pretty_to_string (`Assoc ["errors", `List (List.map errors ~f:Error.to_json)]) |> Log.print "%s"; exit (ExitStatus.exit_code ExitStatus.TaintConfigurationError) let exception_on_error = function | Ok configuration -> configuration | Error (error :: _) -> Error.show error |> failwith | Error _ -> failwith "unreachable" let register configuration = let key = SharedMemory.SingletonKey.key in let () = if ConfigurationSharedMemory.mem key then ConfigurationSharedMemory.remove_batch (ConfigurationSharedMemory.KeySet.singleton key) in ConfigurationSharedMemory.add key configuration let default = let sources = List.map ~f:(fun name -> { AnnotationParser.name; kind = Named }) ["Demo"; "Test"; "UserControlled"; "PII"; "Secrets"; "Cookies"] in let sinks = List.map ~f:(fun name -> { AnnotationParser.name; kind = Named }) [ "Demo"; "FileSystem"; "GetAttr"; "Logging"; "RemoteCodeExecution"; "SQL"; "Test"; "XMLParser"; "XSS"; ] in let transforms = List.map ~f:(fun name -> TaintTransform.Named name) ["DemoTransform"; "TestTransform"] in let rules = [ { Rule.sources = [Sources.NamedSource "UserControlled"]; sinks = [Sinks.NamedSink "RemoteCodeExecution"]; transforms = []; code = 5001; name = "Possible shell injection."; message_format = "Possible remote code execution due to [{$sources}] data reaching [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "Test"; Sources.NamedSource "UserControlled"]; sinks = [Sinks.NamedSink "Test"]; transforms = []; code = 5002; name = "Test flow."; message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "UserControlled"]; sinks = [Sinks.NamedSink "SQL"]; transforms = []; code = 5005; name = "User controlled data to SQL execution."; message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "Cookies"; Sources.NamedSource "PII"; Sources.NamedSource "Secrets"]; sinks = [Sinks.NamedSink "Logging"]; transforms = []; code = 5006; name = "Restricted data being logged."; message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "UserControlled"]; sinks = [Sinks.NamedSink "XMLParser"]; transforms = []; code = 5007; name = "User data to XML Parser."; message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "UserControlled"]; sinks = [Sinks.NamedSink "XSS"]; transforms = []; code = 5008; name = "XSS"; message_format = "Possible XSS due to [{$sources}] data reaching [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "Demo"]; sinks = [Sinks.NamedSink "Demo"]; transforms = []; code = 5009; name = "Demo flow."; message_format = "Data from [{$sources}] source(s) may reach [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "UserControlled"]; sinks = [Sinks.NamedSink "GetAttr"]; transforms = []; code = 5010; name = "User data to getattr."; message_format = "Attacker may control at least one argument to getattr(,)."; }; { sources = [Sources.NamedSource "Test"]; sinks = [Sinks.NamedSink "Test"]; transforms = [TaintTransform.Named "TestTransform"]; code = 5011; name = "Flow with one transform."; message_format = "Data from [{$sources}] source(s) via [{$transforms}] may reach [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "Test"]; sinks = [Sinks.NamedSink "Test"]; transforms = [TaintTransform.Named "TestTransform"; TaintTransform.Named "DemoTransform"]; code = 5011; name = "Flow with two transforms."; message_format = "Data from [{$sources}] source(s) via [{$transforms}] may reach [{$sinks}] sink(s)"; }; { sources = [Sources.NamedSource "Demo"]; sinks = [Sinks.NamedSink "Demo"]; transforms = []; code = 6001; name = "Duplicate demo flow."; message_format = "Possible remote code execution due to [{$sources}] data reaching [{$sinks}] sink(s)"; }; ] in let matching_sources, matching_sinks = matching_kinds_from_rules rules in let possible_tito_transforms = possible_tito_transforms_from_rules rules in { sources; sinks; transforms; features = [ "copy"; "default"; "object"; "special_source"; "special_sink"; "string_concat_lhs"; "string_concat_rhs"; ]; rules; partial_sink_converter = String.Map.Tree.empty; partial_sink_labels = String.Map.Tree.empty; implicit_sinks = empty_implicit_sinks; implicit_sources = empty_implicit_sources; matching_sources; matching_sinks; possible_tito_transforms; find_missing_flows = None; dump_model_query_results_path = None; analysis_model_constraints = default_analysis_model_constraints; lineage_analysis = false; } let obscure_flows_configuration configuration = let rules = [ { Rule.sources = List.map ~f:(fun { name = source; _ } -> Sources.NamedSource source) configuration.sources; sinks = [Sinks.NamedSink "Obscure"]; transforms = []; code = 9001; name = "Obscure flow."; message_format = "Data from [{$sources}] source(s) may reach an obscure model"; }; ] in let matching_sources, matching_sinks = matching_kinds_from_rules rules in { configuration with rules; matching_sources; matching_sinks; find_missing_flows = Some Obscure } let missing_type_flows_configuration configuration = let rules = [ { Rule.sources = List.map ~f:(fun { name = source; _ } -> Sources.NamedSource source) configuration.sources; sinks = [Sinks.NamedSink "UnknownCallee"]; transforms = []; code = 9002; name = "Unknown callee flow."; message_format = "Data from [{$sources}] source(s) may flow to an unknown callee"; }; ] in let matching_sources, matching_sinks = matching_kinds_from_rules rules in { configuration with rules; matching_sources; matching_sinks; find_missing_flows = Some Type } let apply_missing_flows configuration = function | Obscure -> obscure_flows_configuration configuration | Type -> missing_type_flows_configuration configuration let get () = match ConfigurationSharedMemory.get SharedMemory.SingletonKey.key with | None -> default | Some configuration -> configuration let create ~rule_filter ~find_missing_flows ~dump_model_query_results_path ~maximum_trace_length ~maximum_tito_depth ~taint_model_paths = let open Result in let file_paths = PyrePath.get_matching_files_recursively ~suffix:".config" ~paths:taint_model_paths in let parse_configuration path = if not (PyrePath.file_exists path) then Error (Error.create ~path ~kind:Error.FileNotFound) else let content = path |> File.create |> File.content |> Result.of_option ~error:(Error.create ~path ~kind:FileRead) in try content >>| Json.from_string >>| fun json -> path, json with | Yojson.Json_error parse_error -> Error (Error.create ~path ~kind:(Error.InvalidJson parse_error)) in let configurations = file_paths |> List.map ~f:parse_configuration |> Result.combine_errors in match configurations with | Error errors -> Error errors | Ok [] -> Error [{ Error.path = None; kind = NoConfigurationFound }] | Ok configurations -> ( parse configurations >>= validate >>| fun configuration -> let configuration = match find_missing_flows with | Some Obscure -> obscure_flows_configuration configuration | Some Type -> missing_type_flows_configuration configuration | None -> configuration in let configuration = { configuration with dump_model_query_results_path } in let configuration = match maximum_trace_length with | None -> configuration | Some _ -> let analysis_model_constraints = { configuration.analysis_model_constraints with maximum_trace_length } in { configuration with analysis_model_constraints } in let configuration = match maximum_tito_depth with | None -> configuration | Some _ -> let analysis_model_constraints = { configuration.analysis_model_constraints with maximum_tito_depth } in { configuration with analysis_model_constraints } in match rule_filter with | None -> configuration | Some rule_filter -> let codes_to_keep = Int.Set.of_list rule_filter in let { rules; _ } = configuration in let rules = List.filter rules ~f:(fun { code; _ } -> Set.mem codes_to_keep code) in { configuration with rules }) let conditional_test_sinks () = match get () with | { implicit_sinks = { conditional_test; _ }; _ } -> conditional_test let literal_string_sinks () = match get () with | { implicit_sinks = { literal_string_sinks; _ }; _ } -> literal_string_sinks let get_triggered_sink ~partial_sink ~source = let { partial_sink_converter; _ } = get () in PartialSinkConverter.get_triggered_sink partial_sink_converter ~partial_sink ~source let is_missing_flow_analysis kind = match get () with | { find_missing_flows; _ } -> Option.equal [%compare.equal: missing_flows_kind] (Some kind) find_missing_flows let get_maximum_model_width () = match get () with | { analysis_model_constraints = { maximum_model_width; _ }; _ } -> maximum_model_width let literal_string_sources () = let { implicit_sources; _ } = get () in implicit_sources.literal_strings let maximum_return_access_path_width = 5 let maximum_return_access_path_depth = 3 let maximum_tito_positions = 50 let maximum_tree_depth_after_widening = 4 let maximum_tito_leaves = 5