source/interprocedural_analyses/taint/features.ml (420 lines of code) (raw):
(*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open Core
open Ast
open Analysis
open Pyre
module MakeInterner (T : sig
type t
val name : string
val pp : Format.formatter -> t -> unit
val show : t -> string
end) =
struct
module Value = struct
include T
let to_string = T.show
let prefix = Prefix.make ()
let description = T.name
let unmarshall value = Marshal.from_string value 0
end
include Memory.Interner (Value)
let name = T.name
let pp formatter id = id |> unintern |> T.pp formatter
let show id = id |> unintern |> T.show
end
module First (Kind : sig
val kind : string
end) =
struct
type t = string [@@deriving show]
let name = "first-" ^ Kind.kind
let compare = String.compare
let to_json firsts =
match firsts with
| [] -> []
| _ :: _ ->
let first_name = "first-" ^ Kind.kind in
let element_to_json element = `Assoc [first_name, `String element] in
`Assoc ["has", `String first_name] :: List.map firsts ~f:element_to_json
end
module FirstIndex = First (struct
let kind = "index"
end)
module FirstIndexInterned = MakeInterner (FirstIndex)
module FirstIndexSet = struct
include Abstract.SetDomain.Make (FirstIndexInterned)
let number_regexp = Str.regexp "[0-9]+"
let add_first index indices =
let is_numeric name = Str.string_match number_regexp name 0 in
let to_first_name = function
| Abstract.TreeDomain.Label.Index name when is_numeric name -> Some "<numeric>"
| Index name -> Some name
| Field _ -> None
| AnyIndex -> Some "<unknown>"
in
if is_bottom indices then
to_first_name index
>>| FirstIndexInterned.intern
>>| singleton
|> Option.value ~default:bottom
else
indices
let sequence_join new_indices existing_indices =
if is_bottom existing_indices then
new_indices
else
existing_indices
end
module FirstField = First (struct
let kind = "field"
end)
module FirstFieldInterned = MakeInterner (FirstField)
module FirstFieldSet = struct
include Abstract.SetDomain.Make (FirstFieldInterned)
let add_first field fields =
if is_bottom fields then
field |> FirstFieldInterned.intern |> singleton
else
fields
let sequence_join new_fields existing_fields =
if is_bottom existing_fields then
new_fields
else
existing_fields
end
module TitoPosition = struct
let name = "tito positions"
type t = Location.t [@@deriving show, compare]
let max_count () = TaintConfiguration.maximum_tito_positions
end
module TitoPositionSet = Abstract.ToppedSetDomain.Make (TitoPosition)
module LeafName = struct
let name = "leaf names"
type t = {
leaf: string;
port: string option;
}
[@@deriving compare]
let pp formatter { leaf; port } =
match port with
| None -> Format.fprintf formatter "LeafName(%s)" leaf
| Some port -> Format.fprintf formatter "LeafName(%s, port=%s)" leaf port
let show = Format.asprintf "%a" pp
let to_json { leaf; port } =
let port_assoc =
match port with
| Some port -> ["port", `String port]
| None -> []
in
`Assoc (port_assoc @ ["name", `String leaf])
end
module LeafNameInterned = MakeInterner (LeafName)
module LeafNameSet = Abstract.SetDomain.Make (LeafNameInterned)
module Breadcrumb = struct
let name = "breadcrumbs"
type t =
| FormatString (* Via f"{something}" *)
| ObscureModel
| ObscureUnknownCallee
| Lambda
| SimpleVia of string (* Declared breadcrumbs *)
| ViaValue of {
tag: string option;
value: string;
}
(* Via inferred from ViaValueOf. *)
| ViaType of {
tag: string option;
value: string;
}
(* Via inferred from ViaTypeOf. *)
| Tito
| Type of string (* Type constraint *)
| Broadening (* Taint tree was collapsed for various reasons *)
| WidenBroadening (* Taint tree was collapsed during widening *)
| TitoBroadening (* Taint tree was collapsed when applying tito *)
| IssueBroadening (* Taint tree was collapsed when matching sources and sinks *)
[@@deriving compare]
let pp formatter breadcrumb =
let pp_via_value_or_type header tag value =
match tag with
| None -> Format.fprintf formatter "%s[%s]" header value
| Some tag -> Format.fprintf formatter "%s[%s, tag=%s]" header value tag
in
match breadcrumb with
| FormatString -> Format.fprintf formatter "FormatString"
| ObscureModel -> Format.fprintf formatter "ObscureModel"
| ObscureUnknownCallee -> Format.fprintf formatter "ObscureUnknownCallee"
| Lambda -> Format.fprintf formatter "Lambda"
| SimpleVia name -> Format.fprintf formatter "SimpleVia[%s]" name
| ViaValue { tag; value } -> pp_via_value_or_type "ViaValue" tag value
| ViaType { tag; value } -> pp_via_value_or_type "ViaType" tag value
| Tito -> Format.fprintf formatter "Tito"
| Type name -> Format.fprintf formatter "Type[%s]" name
| Broadening -> Format.fprintf formatter "Broadening"
| WidenBroadening -> Format.fprintf formatter "WidenBroadening"
| TitoBroadening -> Format.fprintf formatter "TitoBroadening"
| IssueBroadening -> Format.fprintf formatter "IssueBroadening"
let show = Format.asprintf "%a" pp
let to_json ~on_all_paths breadcrumb =
let prefix = if on_all_paths then "always-" else "" in
let via_value_or_type_annotation ~via_kind ~tag ~value =
match tag with
| None -> `Assoc [Format.sprintf "%svia-%s" prefix via_kind, `String value]
| Some tag -> `Assoc [Format.sprintf "%svia-%s-%s" prefix tag via_kind, `String value]
in
match breadcrumb with
| FormatString -> `Assoc [prefix ^ "via", `String "format-string"]
| ObscureModel -> `Assoc [prefix ^ "via", `String "obscure:model"]
| ObscureUnknownCallee -> `Assoc [prefix ^ "via", `String "obscure:unknown-callee"]
| Lambda -> `Assoc [prefix ^ "via", `String "lambda"]
| SimpleVia name -> `Assoc [prefix ^ "via", `String name]
| ViaValue { tag; value } -> via_value_or_type_annotation ~via_kind:"value" ~tag ~value
| ViaType { tag; value } -> via_value_or_type_annotation ~via_kind:"type" ~tag ~value
| Tito -> `Assoc [prefix ^ "via", `String "tito"]
| Type name -> `Assoc [prefix ^ "type", `String name]
| Broadening -> `Assoc [prefix ^ "via", `String "broadening"]
| WidenBroadening -> `Assoc [prefix ^ "via", `String "widen-broadening"]
| TitoBroadening -> `Assoc [prefix ^ "via", `String "tito-broadening"]
| IssueBroadening -> `Assoc [prefix ^ "via", `String "issue-broadening"]
let simple_via ~allowed name =
if List.mem allowed name ~equal:String.equal then
Ok (SimpleVia name)
else
Error (Format.sprintf "Unrecognized Via annotation `%s`" name)
end
module BreadcrumbInterned = MakeInterner (Breadcrumb)
module BreadcrumbSet = Abstract.OverUnderSetDomain.Make (BreadcrumbInterned)
module ViaFeature = struct
let name = "via features"
type t =
| ViaValueOf of {
parameter: AccessPath.Root.t;
tag: string option;
}
| ViaTypeOf of {
parameter: AccessPath.Root.t;
tag: string option;
}
[@@deriving compare]
let pp formatter simple =
let pp_via_value_or_type header parameter tag =
match tag with
| None -> Format.fprintf formatter "%s[%a]" header AccessPath.Root.pp parameter
| Some tag ->
Format.fprintf formatter "%s[%a, tag=%s]" header AccessPath.Root.pp parameter tag
in
match simple with
| ViaValueOf { parameter; tag } -> pp_via_value_or_type "ViaValueOf" parameter tag
| ViaTypeOf { parameter; tag } -> pp_via_value_or_type "ViaTypeOf" parameter tag
let show = Format.asprintf "%a" pp
let via_value_of_breadcrumb ?tag ~arguments =
let open Ast.Expression.Call.Argument in
let extract_constant_value arguments =
List.find_map
~f:(fun argument -> Interprocedural.CallResolution.extract_constant_name argument.value)
arguments
in
let argument_kind = function
| { value = { Node.value = Starred (Once _); _ }; _ } -> "args"
| { value = { Node.value = Starred (Twice _); _ }; _ } -> "kwargs"
| { value = _; name = Some _ } -> "named"
| { value = _; name = None } -> "positional"
in
let generate_kind arguments =
List.map ~f:argument_kind arguments
|> List.sort ~compare:String.compare
|> String.concat ~sep:"_or_"
in
let feature =
match arguments with
| [] -> "<missing>"
| arguments -> (
match extract_constant_value arguments with
| Some value -> value
| None -> Format.asprintf "<unknown:%s>" (generate_kind arguments))
in
Breadcrumb.ViaValue { value = feature; tag } |> BreadcrumbInterned.intern
let via_type_of_breadcrumb ?tag ~resolution ~argument =
let feature =
argument
>>| Resolution.resolve_expression resolution
>>| snd
>>| Type.weaken_literals
|> Option.value ~default:Type.Top
|> Type.show
in
Breadcrumb.ViaType { value = feature; tag } |> BreadcrumbInterned.intern
let via_type_of_breadcrumb_for_object ?tag ~resolution ~object_target =
let feature =
object_target
|> Reference.create
|> Resolution.resolve_reference resolution
|> Type.weaken_literals
|> Type.show
in
Breadcrumb.ViaType { value = feature; tag } |> BreadcrumbInterned.intern
let to_json via =
let to_json_via_value_or_type kind parameter tag =
let json = ["kind", `String kind; "parameter", `String (AccessPath.Root.show parameter)] in
let json =
match tag with
| Some tag -> ("tag", `String tag) :: json
| None -> json
in
`Assoc json
in
match via with
| ViaValueOf { parameter; tag } -> to_json_via_value_or_type "ViaValueOf" parameter tag
| ViaTypeOf { parameter; tag } -> to_json_via_value_or_type "ViaTypeOf" parameter tag
end
module ViaFeatureSet = Abstract.SetDomain.Make (ViaFeature)
module ReturnAccessPath = struct
let name = "return access paths"
type t = Abstract.TreeDomain.Label.path [@@deriving show { with_path = false }, compare]
let less_or_equal ~left ~right = Abstract.TreeDomain.Label.is_prefix ~prefix:right left
let common_prefix = function
| head :: tail -> List.fold ~init:head ~f:Abstract.TreeDomain.Label.common_prefix tail
| [] -> []
let widen set =
if List.length set > TaintConfiguration.maximum_return_access_path_width then
[common_prefix set]
else
let truncate = function
| p when List.length p > TaintConfiguration.maximum_return_access_path_depth ->
List.take p TaintConfiguration.maximum_return_access_path_depth
| x -> x
in
List.map ~f:truncate set
let to_json path = `String (Abstract.TreeDomain.Label.show_path path)
end
module ReturnAccessPathSet = struct
module T = Abstract.ElementSetDomain.Make (ReturnAccessPath)
include T
let join left right =
let set = T.join left right in
if T.count set > TaintConfiguration.maximum_return_access_path_width then
set |> T.elements |> ReturnAccessPath.common_prefix |> T.singleton
else
set
end
(* We need to make all breadcrumb creation lazy because the shared memory might
* not be initialized yet. *)
let memoize closure () = Lazy.force closure
let memoize_breadcrumb_interned breadcrumb =
memoize (lazy (breadcrumb |> BreadcrumbInterned.intern))
let obscure_model = memoize_breadcrumb_interned Breadcrumb.ObscureModel
let obscure_unknown_callee = memoize_breadcrumb_interned Breadcrumb.ObscureUnknownCallee
let lambda = memoize_breadcrumb_interned Breadcrumb.Lambda
let tito = memoize_breadcrumb_interned Breadcrumb.Tito
let format_string = memoize_breadcrumb_interned Breadcrumb.FormatString
let type_scalar = memoize_breadcrumb_interned (Breadcrumb.Type "scalar")
let type_bool = memoize_breadcrumb_interned (Breadcrumb.Type "bool")
let type_integer = memoize_breadcrumb_interned (Breadcrumb.Type "integer")
let type_enumeration = memoize_breadcrumb_interned (Breadcrumb.Type "enumeration")
let broadening = memoize_breadcrumb_interned Breadcrumb.Broadening
let issue_broadening = memoize_breadcrumb_interned Breadcrumb.IssueBroadening
let memoize_breadcrumb_set breadcrumbs =
memoize
(lazy
(breadcrumbs
|> List.map ~f:(fun breadcrumb ->
Abstract.Domain.Part (BreadcrumbSet.Element, BreadcrumbInterned.intern breadcrumb))
|> BreadcrumbSet.create))
let widen_broadening_set =
memoize_breadcrumb_set [Breadcrumb.Broadening; Breadcrumb.WidenBroadening]
let tito_broadening_set = memoize_breadcrumb_set [Breadcrumb.Broadening; Breadcrumb.TitoBroadening]
let issue_broadening_set =
memoize_breadcrumb_set [Breadcrumb.Broadening; Breadcrumb.IssueBroadening]
let type_bool_scalar_set = memoize_breadcrumb_set [Breadcrumb.Type "scalar"; Breadcrumb.Type "bool"]
let type_breadcrumbs ~resolution annotation =
let matches_at_leaves ~f annotation =
let rec matches_at_leaves ~f annotation =
match annotation with
| Type.Any
| Type.Bottom ->
false
| Type.Union [Type.NoneType; annotation]
| Type.Union [annotation; Type.NoneType]
| Type.Parametric { name = "typing.Awaitable"; parameters = [Single annotation] } ->
matches_at_leaves ~f annotation
| Type.Tuple (Concatenation concatenation) ->
Type.OrderedTypes.Concatenation.extract_sole_unbounded_annotation concatenation
>>| (fun element -> matches_at_leaves ~f element)
|> Option.value ~default:(f annotation)
| Type.Tuple (Type.OrderedTypes.Concrete annotations) ->
List.for_all annotations ~f:(matches_at_leaves ~f)
| annotation -> f annotation
in
annotation >>| matches_at_leaves ~f |> Option.value ~default:false
in
let is_boolean =
matches_at_leaves annotation ~f:(fun left ->
GlobalResolution.less_or_equal resolution ~left ~right:Type.bool)
in
let is_integer =
matches_at_leaves annotation ~f:(fun left ->
GlobalResolution.less_or_equal resolution ~left ~right:Type.integer)
in
let is_float =
matches_at_leaves annotation ~f:(fun left ->
GlobalResolution.less_or_equal resolution ~left ~right:Type.float)
in
let is_enumeration =
matches_at_leaves annotation ~f:(fun left ->
GlobalResolution.less_or_equal resolution ~left ~right:Type.enumeration)
in
let is_scalar = is_boolean || is_integer || is_float || is_enumeration in
let add_if condition breadcrumb features =
if condition then
BreadcrumbSet.add (breadcrumb ()) features
else
features
in
BreadcrumbSet.bottom
|> add_if is_scalar type_scalar
|> add_if is_boolean type_bool
|> add_if is_integer type_integer
|> add_if is_enumeration type_enumeration
let expand_via_features ~resolution ~callees ~arguments via_features =
let expand_via_feature via_feature breadcrumbs =
let match_all_arguments_to_parameter parameter =
AccessPath.match_actuals_to_formals arguments [parameter]
|> List.filter_map ~f:(fun (argument, matches) ->
if not (List.is_empty matches) then
Some argument
else
None)
in
let match_argument_to_parameter parameter =
match match_all_arguments_to_parameter parameter with
| [] -> None
| argument :: _ -> Some argument.value
in
match via_feature with
| ViaFeature.ViaValueOf { parameter; tag } ->
let arguments = match_all_arguments_to_parameter parameter in
BreadcrumbSet.add (ViaFeature.via_value_of_breadcrumb ?tag ~arguments) breadcrumbs
| ViaFeature.ViaTypeOf { parameter; tag } ->
let breadcrumb =
match callees with
| [`Object object_target] ->
ViaFeature.via_type_of_breadcrumb_for_object ?tag ~resolution ~object_target
| _ ->
ViaFeature.via_type_of_breadcrumb
?tag
~resolution
~argument:(match_argument_to_parameter parameter)
in
BreadcrumbSet.add breadcrumb breadcrumbs
in
ViaFeatureSet.fold
ViaFeatureSet.Element
~f:expand_via_feature
~init:BreadcrumbSet.empty
via_features