source/interprocedural_analyses/taint/backwardAnalysis.ml (1,940 lines of code) (raw):
(*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open Core
open Analysis
open Ast
open Expression
open Pyre
open Domains
module CallGraph = Interprocedural.CallGraph
module ClassInterval = Interprocedural.ClassInterval
module type FUNCTION_CONTEXT = sig
val qualifier : Reference.t
val definition : Statement.Define.t Node.t
val debug : bool
val profiler : TaintProfiler.t
val environment : TypeEnvironment.ReadOnly.t
val call_graph_of_define : CallGraph.DefineCallGraph.t
val triggered_sinks : ForwardAnalysis.triggered_sinks
val caller_class_interval : ClassInterval.t
end
module State (FunctionContext : FUNCTION_CONTEXT) = struct
type t = { taint: BackwardState.t }
let bottom = { taint = BackwardState.bottom }
let pp formatter { taint } = BackwardState.pp formatter taint
let show = Format.asprintf "%a" pp
let less_or_equal ~left:{ taint = left; _ } ~right:{ taint = right; _ } =
BackwardState.less_or_equal ~left ~right
let join { taint = left } { taint = right; _ } =
let taint = BackwardState.join left right in
{ taint }
let widen ~previous:{ taint = prev; _ } ~next:{ taint = next; _ } ~iteration =
let taint = BackwardState.widen ~iteration ~prev ~next in
{ taint }
let profiler = FunctionContext.profiler
let log format =
if FunctionContext.debug then
Log.dump format
else
Log.log ~section:`Taint format
let get_call_callees ~location ~call =
let callees =
match
CallGraph.DefineCallGraph.resolve_call FunctionContext.call_graph_of_define ~location ~call
with
| Some callees -> callees
| None ->
(* That can happen if that statement is not reachable by the forward analysis. *)
Log.warning
"Could not find callees for `%a` at `%a:%a` in the call graph. This is most likely \
dead code."
Expression.pp
(Node.create_with_default_location (Expression.Call call))
Reference.pp
FunctionContext.qualifier
Location.pp
location;
CallGraph.CallCallees.create_unresolved Type.Any
in
log
"Resolved callees for call `%a` at %a:@,%a"
Expression.pp
(Node.create_with_default_location (Expression.Call call))
Location.pp
location
CallGraph.CallCallees.pp
callees;
callees
let get_attribute_access_callees ~location ~attribute =
let callees =
CallGraph.DefineCallGraph.resolve_attribute_access
FunctionContext.call_graph_of_define
~location
~attribute
in
let () =
match callees with
| Some callees ->
log
"Resolved attribute access callees for `%s` at %a:@,%a"
attribute
Location.pp
location
CallGraph.AttributeAccessCallees.pp
callees
| _ -> ()
in
callees
let get_format_string_callees ~location =
CallGraph.DefineCallGraph.resolve_format_string FunctionContext.call_graph_of_define ~location
let global_resolution = TypeEnvironment.ReadOnly.global_resolution FunctionContext.environment
let local_annotations =
TypeEnvironment.ReadOnly.get_local_annotations
FunctionContext.environment
(Node.value FunctionContext.definition |> Statement.Define.name)
let is_constructor () =
let { Node.value = { Statement.Define.signature = { name; _ }; _ }; _ } =
FunctionContext.definition
in
match Reference.last name with
| "__init__" -> true
| _ -> false
let first_parameter () =
let { Node.value = { Statement.Define.signature = { parameters; _ }; _ }; _ } =
FunctionContext.definition
in
match parameters with
| { Node.value = { Parameter.name; _ }; _ } :: _ -> Some (AccessPath.Root.Variable name)
| _ -> None
(* This is where we can observe access paths reaching into LocalReturn and record the extraneous
paths for more precise tito. *)
let initial_taint =
(* We handle constructors and property setters specially and track effects. *)
if
is_constructor ()
|| Statement.Define.is_property_setter (Node.value FunctionContext.definition)
then
match first_parameter () with
| Some root ->
BackwardState.assign
~root
~path:[]
(BackwardState.Tree.create_leaf Domains.local_return_taint)
BackwardState.bottom
| _ -> BackwardState.bottom
else
BackwardState.assign
~root:AccessPath.Root.LocalResult
~path:[]
(BackwardState.Tree.create_leaf Domains.local_return_taint)
BackwardState.bottom
let transform_non_leaves path taint =
let f prefix = prefix @ path in
match path with
| Abstract.TreeDomain.Label.AnyIndex :: _ -> taint
| _ -> BackwardTaint.transform Features.ReturnAccessPathSet.Element Map ~f taint
let read_tree = BackwardState.Tree.read ~transform_non_leaves
let get_taint access_path { taint; _ } =
match access_path with
| None -> BackwardState.Tree.empty
| Some { AccessPath.root; path } -> BackwardState.read ~transform_non_leaves ~root ~path taint
let store_taint ?(weak = false) ~root ~path taint { taint = state_taint } =
{ taint = BackwardState.assign ~weak ~root ~path taint state_taint }
let analyze_definition ~define:_ state = state
type call_target_result = {
arguments_taint: BackwardState.Tree.t list;
self_taint: BackwardState.Tree.t option;
callee_taint: BackwardState.Tree.t option;
state: t;
}
let join_call_target_results
{
arguments_taint = left_arguments_taint;
self_taint = left_self_taint;
callee_taint = left_callee_taint;
state = left_state;
}
{
arguments_taint = right_arguments_taint;
self_taint = right_self_taint;
callee_taint = right_callee_taint;
state = right_state;
}
=
let arguments_taint =
List.map2_exn left_arguments_taint right_arguments_taint ~f:BackwardState.Tree.join
in
let join_option left right =
match left, right with
| Some left, Some right -> Some (BackwardState.Tree.join left right)
| Some left, None -> Some left
| None, Some right -> Some right
| None, None -> None
in
let self_taint = join_option left_self_taint right_self_taint in
let callee_taint = join_option left_callee_taint right_callee_taint in
let state = join left_state right_state in
{ arguments_taint; self_taint; callee_taint; state }
let apply_call_target
~resolution
~call_location
~self
~callee
~arguments
~return_type_breadcrumbs
~state:initial_state
~call_taint
({
CallGraph.CallTarget.target;
implicit_self;
implicit_dunder_call;
collapse_tito = _;
index = _;
receiver_type;
} as call_target)
=
let arguments =
if implicit_self && not implicit_dunder_call then
{ Call.Argument.name = None; value = Option.value_exn self } :: arguments
else if implicit_self && implicit_dunder_call then
{ Call.Argument.name = None; value = callee } :: arguments
else
arguments
in
let triggered_taint =
match Hashtbl.find FunctionContext.triggered_sinks call_location with
| Some items ->
List.fold
items
~f:(fun state (root, sink) ->
let new_taint =
BackwardTaint.singleton sink Frame.initial |> BackwardState.Tree.create_leaf
in
BackwardState.assign ~root ~path:[] new_taint state)
~init:BackwardState.bottom
| None -> BackwardState.bottom
in
let ({ Model.modes; _ } as taint_model) =
TaintProfiler.track_model_fetch
~profiler
~analysis:TaintProfiler.Backward
~call_target:target
~f:(fun () -> CallModel.at_callsite ~resolution ~call_target:target ~arguments)
in
log
"Backward analysis of call to `%a` with arguments (%a)@,Call site model:@,%a"
Interprocedural.Target.pretty_print
target
Ast.Expression.pp_expression_argument_list
arguments
Model.pp
taint_model;
let taint_model =
{
taint_model with
backward =
{
taint_model.backward with
sink_taint = BackwardState.join taint_model.backward.sink_taint triggered_taint;
};
}
in
let call_taint =
if Model.ModeSet.contains Obscure modes then
BackwardState.Tree.add_local_breadcrumbs (Lazy.force return_type_breadcrumbs) call_taint
else
call_taint
in
let get_argument_taint ~resolution ~argument:{ Call.Argument.value = argument; _ } =
let global_sink =
GlobalModel.from_expression
~resolution
~call_graph:FunctionContext.call_graph_of_define
~qualifier:FunctionContext.qualifier
~expression:argument
~interval:FunctionContext.caller_class_interval
|> GlobalModel.get_sinks
|> Issue.SinkTreeWithHandle.join
in
let access_path = AccessPath.of_expression argument in
get_taint access_path initial_state |> BackwardState.Tree.join global_sink
in
let convert_tito_path_to_taint ~kind (tito_path, tito_taint) argument_taint =
let breadcrumbs = BackwardTaint.joined_breadcrumbs tito_taint in
let tito_depth =
BackwardTaint.fold TraceLength.Self tito_taint ~f:TraceLength.join ~init:TraceLength.bottom
in
let taint_to_propagate =
match Sinks.discard_transforms kind with
| Sinks.LocalReturn -> call_taint
(* Attach nodes shouldn't affect analysis. *)
| Sinks.Attach -> BackwardState.Tree.empty
| Sinks.ParameterUpdate n -> (
match List.nth arguments n with
| None -> BackwardState.Tree.empty
| Some argument -> get_argument_taint ~resolution ~argument)
| _ -> failwith "unexpected tito sink"
in
let taint_to_propagate =
match kind with
| Sinks.Transform { local; global; _ } ->
(* Apply tito transforms and source- and sink-specific sanitizers. *)
let transforms = TaintTransforms.merge ~local ~global in
let sanitize_transforms = TaintTransforms.get_sanitize_transforms transforms in
let named_transforms = TaintTransforms.get_named_transforms transforms in
let sanitized_tito_sinks =
Sinks.extract_sanitized_sinks_from_transforms sanitize_transforms
in
let sanitized_tito_sources = SanitizeTransform.Set.filter_sources sanitize_transforms in
let sanitized_tito_sinks_transforms =
SanitizeTransform.Set.filter_sinks sanitize_transforms
in
taint_to_propagate
|> BackwardState.Tree.sanitize sanitized_tito_sinks
|> BackwardState.Tree.apply_sanitize_transforms sanitized_tito_sources
|> BackwardState.Tree.apply_sanitize_sink_transforms sanitized_tito_sinks_transforms
|> BackwardState.Tree.apply_named_transforms named_transforms
|> BackwardState.Tree.transform BackwardTaint.kind Filter ~f:Issue.sink_can_match_rule
| _ -> taint_to_propagate
in
let compute_tito_depth kind depth =
match Sinks.discard_transforms kind with
| Sinks.LocalReturn -> max depth (1 + tito_depth)
| _ -> depth
in
CallModel.return_paths ~kind ~tito_taint
|> List.fold
~f:(fun taint return_path ->
read_tree return_path taint_to_propagate
|> BackwardState.Tree.collapse
~transform:(BackwardTaint.add_local_breadcrumbs (Features.tito_broadening_set ()))
|> BackwardTaint.add_local_breadcrumbs breadcrumbs
|> BackwardTaint.transform
TraceLength.Self
(Context (BackwardTaint.kind, Map))
~f:compute_tito_depth
|> BackwardState.Tree.create_leaf
|> BackwardState.Tree.prepend tito_path
|> BackwardState.Tree.join taint)
~init:argument_taint
in
let convert_tito_tree_to_taint ~argument ~kind ~tito_tree taint_tree =
BackwardState.Tree.fold
BackwardState.Tree.Path
tito_tree
~init:BackwardState.Tree.bottom
~f:(convert_tito_path_to_taint ~kind)
|> BackwardState.Tree.transform Features.TitoPositionSet.Element Add ~f:argument.Node.location
|> BackwardState.Tree.add_local_breadcrumb (Features.tito ())
|> BackwardState.Tree.join taint_tree
in
let is_self_call = Ast.Expression.is_self_call ~callee in
let receiver_class_interval = ClassInterval.SharedMemory.of_type receiver_type in
let analyze_argument
(arguments_taint, state)
{ CallModel.ArgumentMatches.argument; sink_matches; tito_matches; sanitize_matches }
=
let location =
Location.with_module ~module_reference:FunctionContext.qualifier argument.Node.location
in
let sink_taint =
CallModel.sink_trees_of_argument
~resolution
~transform_non_leaves
~model:taint_model
~location
~call_target
~arguments
~sink_matches
~is_self_call
~caller_class_interval:FunctionContext.caller_class_interval
~receiver_class_interval
|> Issue.SinkTreeWithHandle.join
in
let taint_in_taint_out =
CallModel.taint_in_taint_out_mapping
~transform_non_leaves
~model:taint_model
~tito_matches
~sanitize_matches
|> CallModel.TaintInTaintOutMap.fold
~init:BackwardState.Tree.empty
~f:(convert_tito_tree_to_taint ~argument)
in
let taint = BackwardState.Tree.join sink_taint taint_in_taint_out in
let state =
match AccessPath.of_expression argument with
| Some { AccessPath.root; path } ->
let breadcrumbs_to_add =
BackwardState.Tree.filter_by_kind ~kind:Sinks.AddFeatureToArgument sink_taint
|> BackwardTaint.accumulated_breadcrumbs
in
if Features.BreadcrumbSet.is_bottom breadcrumbs_to_add then
state
else
let taint =
BackwardState.read state.taint ~root ~path
|> BackwardState.Tree.add_local_breadcrumbs breadcrumbs_to_add
in
{ taint = BackwardState.assign ~root ~path taint state.taint }
| None -> state
in
taint :: arguments_taint, state
in
let arguments_taint, state =
CallModel.match_actuals_to_formals ~model:taint_model ~arguments
|> List.rev
|> List.fold ~f:analyze_argument ~init:([], initial_state)
in
(* Extract the taint for self. *)
let self_taint, callee_taint, arguments_taint =
if implicit_self && not implicit_dunder_call then
match arguments_taint with
| self_taint :: arguments_taint -> Some self_taint, None, arguments_taint
| _ -> failwith "missing taint for self argument"
else if implicit_self && implicit_dunder_call then
match arguments_taint with
| callee_taint :: arguments_taint -> None, Some callee_taint, arguments_taint
| _ -> failwith "missing taint for callee argument"
else
None, None, arguments_taint
in
{ arguments_taint; self_taint; callee_taint; state }
let apply_obscure_call ~callee ~arguments ~state:initial_state ~call_taint =
log
"Backward analysis of obscure call to `%a` with arguments (%a)"
Expression.pp
callee
Ast.Expression.pp_expression_argument_list
arguments;
let obscure_taint =
BackwardState.Tree.collapse
~transform:(BackwardTaint.add_local_breadcrumbs (Features.tito_broadening_set ()))
call_taint
|> BackwardTaint.add_local_breadcrumb (Features.obscure_unknown_callee ())
|> BackwardState.Tree.create_leaf
in
let compute_argument_taint { Call.Argument.value = argument; _ } =
let taint = obscure_taint in
let taint =
match argument.Node.value with
| Starred (Starred.Once _)
| Starred (Starred.Twice _) ->
BackwardState.Tree.prepend [Abstract.TreeDomain.Label.AnyIndex] taint
| _ -> taint
in
let taint =
BackwardState.Tree.transform
Features.TitoPositionSet.Element
Add
~f:argument.Node.location
taint
in
taint
in
let arguments_taint = List.map ~f:compute_argument_taint arguments in
{ arguments_taint; self_taint = None; callee_taint = Some obscure_taint; state = initial_state }
let apply_constructor_targets
~resolution
~call_location
~callee
~arguments
~new_targets
~init_targets
~return_type_breadcrumbs
~state:initial_state
~call_taint
=
(* Call `__init__`. Add the `self` implicit argument. *)
let { arguments_taint = init_arguments_taint; self_taint; callee_taint = _; state } =
match init_targets with
| [] ->
{
arguments_taint = List.map arguments ~f:(fun _ -> BackwardState.Tree.bottom);
self_taint = Some call_taint;
callee_taint = None;
state = initial_state;
}
| init_targets ->
let call_expression =
Expression.Call { Call.callee; arguments } |> Node.create ~location:call_location
in
List.map init_targets ~f:(fun target ->
apply_call_target
~resolution
~call_location
~self:(Some call_expression)
~callee
~arguments
~return_type_breadcrumbs
~state:initial_state
~call_taint
target)
|> List.fold
~f:join_call_target_results
~init:
{
arguments_taint = List.map arguments ~f:(fun _ -> BackwardState.Tree.bottom);
self_taint = None;
callee_taint = None;
state = bottom;
}
in
let self_taint = Option.value_exn self_taint in
(* Call `__new__`. *)
let call_target_result =
match new_targets with
| []
| [
{
CallGraph.CallTarget.target =
`Method { Interprocedural.Target.class_name = "object"; method_name = "__new__" };
_;
};
] ->
{ arguments_taint = init_arguments_taint; self_taint = None; callee_taint = None; state }
| new_targets ->
(* Add the `cls` implicit argument. *)
let {
arguments_taint = new_arguments_taint;
self_taint = callee_taint;
callee_taint = _;
state;
}
=
List.map new_targets ~f:(fun target ->
apply_call_target
~resolution
~call_location
~self:(Some callee)
~callee
~arguments
~return_type_breadcrumbs
~state
~call_taint:self_taint
target)
|> List.fold
~f:join_call_target_results
~init:
{
arguments_taint = List.map arguments ~f:(fun _ -> BackwardState.Tree.bottom);
self_taint = None;
callee_taint = None;
state = bottom;
}
in
{
arguments_taint =
List.map2_exn init_arguments_taint new_arguments_taint ~f:BackwardState.Tree.join;
self_taint = None;
callee_taint;
state;
}
in
call_target_result
let apply_callees_and_return_arguments_taint
~resolution
~callee
~call_location
~arguments
~state:initial_state
~call_taint
{
CallGraph.CallCallees.call_targets;
new_targets;
init_targets;
return_type;
higher_order_parameter = _;
unresolved;
}
=
let call_taint =
(* Add index breadcrumb if appropriate. *)
match callee.Node.value, arguments with
| Expression.Name (Name.Attribute { attribute = "get"; _ }), index :: _ ->
let label = AccessPath.get_index index.Call.Argument.value in
BackwardState.Tree.add_local_first_index label call_taint
| _ -> call_taint
in
let call_targets, unresolved, call_taint =
(* Specially handle super.__init__ calls and explicit calls to superclass' `__init__` in
constructors for tito. *)
match Node.value callee with
| Name (Name.Attribute { base; attribute; _ })
when is_constructor ()
&& String.equal attribute "__init__"
&& Interprocedural.CallResolution.is_super
~resolution
~define:FunctionContext.definition
base ->
(* If the super call is `object.__init__`, this is likely due to a lack of type
information for that constructor - we treat that case as obscure to not lose argument
taint for these calls. *)
let call_targets, unresolved =
match call_targets with
| [
{
CallGraph.CallTarget.target =
`Method { class_name = "object"; method_name = "__init__" };
_;
};
] ->
[], true
| _ -> call_targets, unresolved
in
let call_taint =
BackwardState.Tree.create_leaf Domains.local_return_taint
|> BackwardState.Tree.join call_taint
in
call_targets, unresolved, call_taint
| _ -> call_targets, unresolved, call_taint
in
let call_targets =
(* Special handling for the missing-flow analysis. *)
if unresolved && TaintConfiguration.is_missing_flow_analysis Type then (
let callable =
MissingFlow.unknown_callee
~location:
(Location.with_module call_location ~module_reference:FunctionContext.qualifier)
~call:(Expression.Call { Call.callee; arguments })
in
if not (Interprocedural.FixpointState.has_model callable) then
MissingFlow.register_unknown_callee_model callable;
let target =
CallGraph.CallTarget.create
~implicit_self:false
~implicit_dunder_call:false
~collapse_tito:true
~index:0
callable
in
target :: call_targets)
else
call_targets
in
(* Extract the implicit self, if any *)
let self =
match callee.Node.value with
| Expression.Name (Name.Attribute { base; _ }) -> Some base
| _ ->
(* Default to a benign self if we don't understand/retain information of what self is. *)
Expression.Constant Constant.NoneLiteral
|> Node.create ~location:callee.Node.location
|> Option.some
in
let return_type_breadcrumbs =
lazy
(let resolution = Resolution.global_resolution resolution in
Features.type_breadcrumbs ~resolution (Some return_type))
in
(* Apply regular call targets. *)
let call_target_result =
List.map
call_targets
~f:
(apply_call_target
~resolution
~call_location
~self
~callee
~arguments
~return_type_breadcrumbs
~state:initial_state
~call_taint)
|> List.fold
~f:join_call_target_results
~init:
{
arguments_taint = List.map arguments ~f:(fun _ -> BackwardState.Tree.bottom);
self_taint = None;
callee_taint = None;
state = bottom;
}
in
(* Apply an obscure call if the call was not fully resolved. *)
let call_target_result =
if unresolved then
apply_obscure_call ~callee ~arguments ~state:initial_state ~call_taint
|> join_call_target_results call_target_result
else
call_target_result
in
(* Apply constructor calls, if any. *)
let call_target_result =
match new_targets, init_targets with
| [], [] -> call_target_result
| _ ->
apply_constructor_targets
~resolution
~call_location
~callee
~arguments
~new_targets
~init_targets
~return_type_breadcrumbs
~state:initial_state
~call_taint
|> join_call_target_results call_target_result
in
call_target_result
let rec analyze_arguments ~resolution ~arguments ~arguments_taint ~state =
(* Explicitly analyze arguments from right to left (opposite of forward analysis). *)
List.zip_exn arguments arguments_taint
|> List.rev
|> List.fold
~init:state
~f:(fun state ({ Call.Argument.value = argument; _ }, argument_taint) ->
analyze_unstarred_expression ~resolution argument_taint argument state)
and analyze_callee ~resolution ~is_property_call ~callee ~self_taint ~callee_taint ~state =
(* Special case: `x.foo()` where foo is a property returning a callable. *)
let callee_is_property =
match is_property_call, callee.Node.value with
| false, Expression.Name (Name.Attribute { attribute; _ }) ->
get_attribute_access_callees ~location:callee.Node.location ~attribute |> Option.is_some
| _ -> false
in
match self_taint, callee_taint, callee_is_property with
| _, _, true
| _, Some _, _ -> (
match callee.Node.value with
| Expression.Name (Name.Attribute { base; attribute; special }) ->
(* If we are already analyzing a call of a property, then ignore properties
* to avoid infinite recursion. *)
let resolve_properties = not is_property_call in
analyze_attribute_access
~resolution
~location:callee.Node.location
~resolve_properties
~base
~attribute
~special
~base_taint:(Option.value self_taint ~default:BackwardState.Tree.bottom)
~attribute_taint:(Option.value callee_taint ~default:BackwardState.Tree.bottom)
~state
| _ ->
analyze_expression
~resolution
~taint:(Option.value callee_taint ~default:BackwardState.Tree.bottom)
~state
~expression:callee)
| Some self_taint, None, _ -> (
match callee.Node.value with
| Expression.Name (Name.Attribute { base; _ }) ->
analyze_expression ~resolution ~taint:self_taint ~state ~expression:base
| _ -> state)
| None, None, _ -> state
and analyze_attribute_access
~resolution
~location
~resolve_properties
~base
~attribute
~special
~base_taint:initial_base_taint
~attribute_taint
~state
=
let expression =
Expression.Name (Name.Attribute { base; attribute; special }) |> Node.create ~location
in
let attribute_access_callees =
if resolve_properties then get_attribute_access_callees ~location ~attribute else None
in
let base_taint_property_call, state_property_call =
match attribute_access_callees with
| Some { property_targets = _ :: _ as property_targets; return_type; _ } ->
let { arguments_taint = _; self_taint; callee_taint = _; state } =
apply_callees_and_return_arguments_taint
~resolution
~callee:expression
~call_location:location
~arguments:[]
~state
~call_taint:attribute_taint
(CallGraph.CallCallees.create ~call_targets:property_targets ~return_type ())
in
let base_taint = Option.value_exn self_taint in
base_taint, state
| _ -> BackwardState.Tree.bottom, bottom
in
let base_taint_attribute, state_attribute =
match attribute_access_callees with
| Some { is_attribute = true; _ }
| None ->
let global_model =
GlobalModel.from_expression
~resolution
~call_graph:FunctionContext.call_graph_of_define
~qualifier:FunctionContext.qualifier
~expression
~interval:FunctionContext.caller_class_interval
in
let add_tito_features taint =
let attribute_breadcrumbs =
global_model |> GlobalModel.get_tito |> BackwardState.Tree.accumulated_breadcrumbs
in
BackwardState.Tree.add_local_breadcrumbs attribute_breadcrumbs taint
in
let apply_attribute_sanitizers taint =
let sanitizer = GlobalModel.get_sanitize global_model in
let taint =
match sanitizer.sinks with
| Some All -> BackwardState.Tree.empty
| Some (Specific sanitized_sinks) ->
let sanitized_sinks_transforms =
Sinks.Set.to_sanitize_transforms_exn sanitized_sinks
in
taint
|> BackwardState.Tree.sanitize sanitized_sinks
|> BackwardState.Tree.apply_sanitize_sink_transforms sanitized_sinks_transforms
| _ -> taint
in
let taint =
match sanitizer.sources with
| Some (Specific sanitized_sources) ->
let sanitized_sources_transforms =
Sources.Set.to_sanitize_transforms_exn sanitized_sources
in
taint
|> BackwardState.Tree.apply_sanitize_transforms sanitized_sources_transforms
|> BackwardState.Tree.transform
BackwardTaint.kind
Filter
~f:Issue.sink_can_match_rule
| _ -> taint
in
taint
in
let base_taint =
attribute_taint
|> add_tito_features
|> BackwardState.Tree.prepend [Abstract.TreeDomain.Label.Index attribute]
|> apply_attribute_sanitizers
in
base_taint, state
| _ -> BackwardState.Tree.bottom, bottom
in
let base_taint =
initial_base_taint
|> BackwardState.Tree.join base_taint_property_call
|> BackwardState.Tree.join base_taint_attribute
in
let state = join state_property_call state_attribute in
analyze_expression ~resolution ~taint:base_taint ~state ~expression:base
and analyze_arguments_for_lambda_call
~resolution
~arguments
~arguments_taint
~state
~lambda_argument:
{ Call.Argument.value = { location = lambda_location; _ } as lambda_callee; _ }
{ CallGraph.HigherOrderParameter.index = lambda_index; call_targets; return_type }
=
(* If we have a lambda `fn` getting passed into `hof`, we use the following strategy:
* hof(q, fn, x, y) gets translated into (analyzed backwards)
* if rand():
* $all = {q, x, y}
* $result = fn( *all, **all)
* else:
* $result = fn
* hof(q, $result, x, y)
*)
let result_taint = List.nth_exn arguments_taint lambda_index in
let non_lambda_arguments_taint =
let arguments_taint = List.zip_exn arguments arguments_taint in
List.take arguments_taint lambda_index @ List.drop arguments_taint (lambda_index + 1)
in
(* Simulate if branch. *)
let if_branch_state, all_taint =
(* Simulate $result = fn( *all, **all) *)
let all_argument =
Expression.Name (Name.Identifier "$all") |> Node.create ~location:lambda_location
in
let arguments =
[
{
Call.Argument.value =
Expression.Starred (Starred.Once all_argument)
|> Node.create ~location:lambda_location;
name = None;
};
{
Call.Argument.value =
Expression.Starred (Starred.Twice all_argument)
|> Node.create ~location:lambda_location;
name = None;
};
]
in
let { arguments_taint; self_taint; callee_taint; state } =
apply_callees_and_return_arguments_taint
~resolution
~callee:lambda_callee
~call_location:lambda_location
~arguments
~call_taint:result_taint
~state
(CallGraph.CallCallees.create ~call_targets ~return_type ())
in
let state =
analyze_callee
~resolution
~is_property_call:false
~callee:lambda_callee
~self_taint
~callee_taint
~state
in
let all_taint =
arguments_taint
|> List.fold ~f:BackwardState.Tree.join ~init:BackwardState.Tree.bottom
|> read_tree [Abstract.TreeDomain.Label.AnyIndex]
|> BackwardState.Tree.add_local_breadcrumb (Features.lambda ())
in
state, all_taint
in
(* Simulate else branch. *)
let else_branch_state =
analyze_expression ~resolution ~taint:result_taint ~state ~expression:lambda_callee
in
(* Join both branches. *)
let state = join else_branch_state if_branch_state in
(* Analyze arguments. *)
List.fold
non_lambda_arguments_taint
~init:state
~f:(fun state ({ Call.Argument.value = argument; _ }, argument_taint) ->
let argument_taint = BackwardState.Tree.join argument_taint all_taint in
analyze_unstarred_expression ~resolution argument_taint argument state)
and apply_callees
~resolution
~is_property
~callee
~call_location
~arguments
~state:initial_state
~call_taint
callees
=
let { arguments_taint; self_taint; callee_taint; state } =
apply_callees_and_return_arguments_taint
~resolution
~callee
~call_location
~arguments
~state:initial_state
~call_taint
callees
in
let state =
match callees with
| {
higher_order_parameter =
Some ({ CallGraph.HigherOrderParameter.index; _ } as higher_order_parameter);
_;
} -> (
match List.nth arguments index with
| Some lambda_argument ->
analyze_arguments_for_lambda_call
~resolution
~arguments
~arguments_taint
~state
~lambda_argument
higher_order_parameter
| _ -> analyze_arguments ~resolution ~arguments ~arguments_taint ~state)
| _ -> analyze_arguments ~resolution ~arguments ~arguments_taint ~state
in
let state =
analyze_callee
~resolution
~is_property_call:is_property
~callee
~self_taint
~callee_taint
~state
in
state
and analyze_dictionary_entry ~resolution taint state { Dictionary.Entry.key; value } =
let key_taint = read_tree [AccessPath.dictionary_keys] taint in
let state = analyze_expression ~resolution ~taint:key_taint ~state ~expression:key in
let field_name = AccessPath.get_index key in
let value_taint = read_tree [field_name] taint in
analyze_expression ~resolution ~taint:value_taint ~state ~expression:value
and analyze_reverse_list_element ~total ~resolution taint reverse_position state expression =
let position = total - reverse_position - 1 in
let index_name = Abstract.TreeDomain.Label.Index (string_of_int position) in
let value_taint = read_tree [index_name] taint in
analyze_expression ~resolution ~taint:value_taint ~state ~expression
and generator_resolution ~resolution generators =
let resolve_generator resolution generator =
Resolution.resolve_assignment resolution (Statement.Statement.generator_assignment generator)
in
List.fold generators ~init:resolution ~f:resolve_generator
and analyze_generators ~resolution ~state generators =
let handle_generator state ({ Comprehension.Generator.conditions; _ } as generator) =
let state =
List.fold conditions ~init:state ~f:(fun state condition ->
analyze_expression
~resolution
~taint:BackwardState.Tree.empty
~state
~expression:condition)
in
let { Statement.Assign.target; value; _ } =
Statement.Statement.generator_assignment generator
in
analyze_assignment ~resolution ~target ~value state
in
List.fold ~f:handle_generator generators ~init:state
and analyze_comprehension ~resolution taint { Comprehension.element; generators; _ } state =
let resolution = generator_resolution ~resolution generators in
let element_taint = read_tree [Abstract.TreeDomain.Label.AnyIndex] taint in
let state = analyze_expression ~resolution ~taint:element_taint ~state ~expression:element in
analyze_generators ~resolution ~state generators
(* Skip through * and **. Used at call sites where * and ** are handled explicitly *)
and analyze_unstarred_expression ~resolution taint expression state =
match expression.Node.value with
| Starred (Starred.Once expression)
| Starred (Starred.Twice expression) ->
analyze_expression ~resolution ~taint ~state ~expression
| _ -> analyze_expression ~resolution ~taint ~state ~expression
and analyze_call ~resolution ~location ~taint ~state ~callee ~arguments =
match { Call.callee; arguments } with
| {
callee =
{ Node.value = Name (Name.Attribute { base; attribute = "__setitem__"; _ }); _ } as callee;
arguments = [{ Call.Argument.value = index; _ }; { Call.Argument.value; _ }] as arguments;
} ->
(* Ensure we simulate the body of __setitem__ in case the function contains taint. *)
let state =
let callees = get_call_callees ~location ~call:{ Call.callee; arguments } in
if CallGraph.CallCallees.is_partially_resolved callees then
apply_callees
~resolution
~is_property:false
~call_location:location
~state
~callee
~arguments
~call_taint:taint
callees
else
state
in
(* Handle base[index] = value. *)
analyze_assignment
~resolution
~fields:[AccessPath.get_index index]
~target:base
~value
state
| {
callee = { Node.value = Name (Name.Attribute { base; attribute = "__getitem__"; _ }); _ };
arguments = [{ Call.Argument.value = argument_value; _ }];
} ->
let index = AccessPath.get_index argument_value in
let taint =
BackwardState.Tree.prepend [index] taint |> BackwardState.Tree.add_local_first_index index
in
analyze_expression ~resolution ~taint ~state ~expression:base
(* Special case x.__iter__().__next__() as being a random index access (this pattern is the
desugaring of `for element in x`). Special case dictionary keys appropriately. *)
| {
callee =
{
Node.value =
Name
(Name.Attribute
{
base =
{
Node.value =
Call
{
callee =
{
Node.value =
Name (Name.Attribute { base; attribute = "__iter__"; _ });
_;
};
arguments = [];
};
_;
};
attribute = "__next__";
_;
});
_;
};
arguments = [];
} ->
let label =
(* For dictionaries, the default iterator is keys. *)
if Resolution.resolve_expression_to_type resolution base |> Type.is_dictionary_or_mapping
then
AccessPath.dictionary_keys
else
Abstract.TreeDomain.Label.AnyIndex
in
let taint = BackwardState.Tree.prepend [label] taint in
analyze_expression ~resolution ~taint ~state ~expression:base
(* We special-case object.__setattr__, which is sometimes used in order to work around
dataclasses being frozen post-initialization. *)
| {
callee =
{
Node.value =
Name
(Name.Attribute
{
base = { Node.value = Name (Name.Identifier "object"); _ };
attribute = "__setattr__";
_;
});
_;
};
arguments =
[
{ Call.Argument.value = self; name = None };
{
Call.Argument.value =
{
Node.value =
Expression.Constant (Constant.String { value = attribute; kind = String });
_;
};
name = None;
};
{ Call.Argument.value; name = None };
];
} ->
analyze_assignment
~resolution
~target:
(Expression.Name (Name.Attribute { base = self; attribute; special = true })
|> Node.create ~location)
~value
state
(* `getattr(a, "field", default)` should evaluate to the join of `a.field` and `default`. *)
| {
callee = { Node.value = Name (Name.Identifier "getattr"); _ };
arguments =
[
{ Call.Argument.value = base; _ };
{
Call.Argument.value =
{
Node.value =
Expression.Constant (Constant.String { StringLiteral.value = attribute; _ });
_;
};
_;
};
{ Call.Argument.value = default; _ };
];
} ->
let attribute_expression =
Expression.Name (Name.Attribute { base; attribute; special = false })
|> Node.create ~location
in
let state = analyze_expression ~resolution ~state ~expression:attribute_expression ~taint in
analyze_expression ~resolution ~state ~expression:default ~taint
(* `zip(a, b, ...)` creates a taint object whose first index has a's taint, second index has b's
taint, etc. *)
| { callee = { Node.value = Name (Name.Identifier "zip"); _ }; arguments = lists } ->
let taint = BackwardState.Tree.read [Abstract.TreeDomain.Label.AnyIndex] taint in
let analyze_zipped_list index state { Call.Argument.value; _ } =
let index_name = Abstract.TreeDomain.Label.Index (string_of_int index) in
let taint =
BackwardState.Tree.read [index_name] taint
|> BackwardState.Tree.prepend [Abstract.TreeDomain.Label.AnyIndex]
in
analyze_expression ~resolution ~state ~taint ~expression:value
in
List.foldi lists ~init:state ~f:analyze_zipped_list
(* dictionary .keys(), .values() and .items() functions are special, as they require handling of
DictionaryKeys taint. *)
| { callee = { Node.value = Name (Name.Attribute { base; attribute = "values"; _ }); _ }; _ }
when Resolution.resolve_expression_to_type resolution base |> Type.is_dictionary_or_mapping ->
let taint =
taint
|> BackwardState.Tree.read [Abstract.TreeDomain.Label.AnyIndex]
|> BackwardState.Tree.prepend [Abstract.TreeDomain.Label.AnyIndex]
in
analyze_expression ~resolution ~taint ~state ~expression:base
| { callee = { Node.value = Name (Name.Attribute { base; attribute = "keys"; _ }); _ }; _ }
when Resolution.resolve_expression_to_type resolution base |> Type.is_dictionary_or_mapping ->
let taint =
taint
|> BackwardState.Tree.read [AccessPath.dictionary_keys]
|> BackwardState.Tree.prepend [Abstract.TreeDomain.Label.AnyIndex]
in
analyze_expression ~resolution ~taint ~state ~expression:base
| {
callee =
{
Node.value =
Name
(Name.Attribute
{
base = { Node.value = Name (Name.Identifier identifier); _ } as base;
attribute = "update";
_;
});
_;
};
arguments =
[
{
Call.Argument.value =
{ Node.value = Expression.Dictionary { Dictionary.entries; keywords = [] }; _ };
_;
};
];
}
when Resolution.resolve_expression_to_type resolution base |> Type.is_dictionary_or_mapping
&& Option.is_some (Dictionary.string_literal_keys entries) ->
let entries = Option.value_exn (Dictionary.string_literal_keys entries) in
let access_path =
Some { AccessPath.root = AccessPath.Root.Variable identifier; path = [] }
in
let dict_taint =
let global_taint =
GlobalModel.from_expression
~resolution
~call_graph:FunctionContext.call_graph_of_define
~qualifier:FunctionContext.qualifier
~expression:base
~interval:FunctionContext.caller_class_interval
|> GlobalModel.get_sinks
|> Issue.SinkTreeWithHandle.join
in
BackwardState.Tree.join global_taint (get_taint access_path state)
in
let override_taint_from_update (taint, state) (key, value) =
let path = [Abstract.TreeDomain.Label.Index key] in
let value_taint =
BackwardState.Tree.read ~transform_non_leaves path dict_taint
|> BackwardState.Tree.transform
Features.TitoPositionSet.Element
Add
~f:value.Node.location
in
(* update backwards is overwriting the old key taint with bottom *)
let taint =
BackwardState.Tree.assign ~tree:taint path ~subtree:BackwardState.Tree.bottom
in
let state = analyze_expression ~resolution ~taint:value_taint ~state ~expression:value in
taint, state
in
let taint, state =
List.fold entries ~init:(dict_taint, state) ~f:override_taint_from_update
in
store_taint ~root:(AccessPath.Root.Variable identifier) ~path:[] taint state
| {
callee =
{
Node.value =
Name
(Name.Attribute
{
base = { Node.value = Name (Name.Identifier identifier); _ } as base;
attribute = "pop";
_;
});
_;
};
arguments =
[
{
Call.Argument.value =
{ Node.value = Expression.Constant (Constant.String { StringLiteral.value; _ }); _ };
_;
};
];
}
when Resolution.resolve_expression_to_type resolution base |> Type.is_dictionary_or_mapping ->
let access_path =
Some { AccessPath.root = AccessPath.Root.Variable identifier; path = [] }
in
let old_taint = get_taint access_path state in
let new_taint =
BackwardState.Tree.assign
~tree:old_taint
[Abstract.TreeDomain.Label.Index value]
~subtree:taint
in
store_taint ~root:(AccessPath.Root.Variable identifier) ~path:[] new_taint state
| { callee = { Node.value = Name (Name.Attribute { base; attribute = "items"; _ }); _ }; _ }
when Resolution.resolve_expression_to_type resolution base |> Type.is_dictionary_or_mapping ->
(* When we're faced with an assign of the form `k, v = d.items().__iter__().__next__()`, the
taint we analyze d.items() under will be {* -> {0 -> k, 1 -> v} }. We want to analyze d
itself under the taint of `{* -> v, $keys -> k}`. *)
let item_taint = BackwardState.Tree.read [Abstract.TreeDomain.Label.AnyIndex] taint in
let key_taint =
BackwardState.Tree.read [Abstract.TreeDomain.Label.create_int_index 0] item_taint
in
let value_taint =
BackwardState.Tree.read [Abstract.TreeDomain.Label.create_int_index 1] item_taint
in
let taint =
BackwardState.Tree.join
(BackwardState.Tree.prepend [AccessPath.dictionary_keys] key_taint)
(BackwardState.Tree.prepend [Abstract.TreeDomain.Label.AnyIndex] value_taint)
in
analyze_expression ~resolution ~taint ~state ~expression:base
| {
Call.callee =
{
Node.value =
Name
(Name.Attribute
{ base = { Node.value = Expression.Name name; _ }; attribute = "gather"; _ });
_;
};
arguments;
}
when String.equal "asyncio" (Name.last name) ->
analyze_expression
~resolution
~taint
~state
~expression:
{
Node.location;
value =
Expression.Tuple
(List.map arguments ~f:(fun argument -> argument.Call.Argument.value));
}
| {
Call.callee = { Node.value = Name (Name.Identifier "reveal_taint"); _ };
arguments = [{ Call.Argument.value = expression; _ }];
} ->
begin
match AccessPath.of_expression expression with
| None ->
Log.dump
"%a: Revealed backward taint for `%s`: expression is too complex"
Location.WithModule.pp
(Location.with_module location ~module_reference:FunctionContext.qualifier)
(Transform.sanitize_expression expression |> Expression.show)
| access_path ->
let taint = get_taint access_path state in
Log.dump
"%a: Revealed backward taint for `%s`: %s"
Location.WithModule.pp
(Location.with_module location ~module_reference:FunctionContext.qualifier)
(Transform.sanitize_expression expression |> Expression.show)
(BackwardState.Tree.show taint)
end;
state
| { Call.callee = { Node.value = Name (Name.Identifier "super"); _ }; arguments } -> (
match arguments with
| [_; Call.Argument.{ value = object_; _ }] ->
analyze_expression ~resolution ~taint ~state ~expression:object_
| _ -> (
(* Use implicit self *)
match first_parameter () with
| Some root -> store_taint ~weak:true ~root ~path:[] taint state
| None -> state))
| _ ->
let taint =
match Node.value callee with
| Name
(Name.Attribute
{
base = { Node.value = Expression.Constant (Constant.String _); _ };
attribute = "format";
_;
}) ->
BackwardState.Tree.add_local_breadcrumb (Features.format_string ()) taint
| _ -> taint
in
let { Call.callee; arguments } =
CallGraph.redirect_special_calls ~resolution { Call.callee; arguments }
in
let callees = get_call_callees ~location ~call:{ Call.callee; arguments } in
apply_callees
~resolution
~is_property:false
~call_location:location
~state
~callee
~arguments
~call_taint:taint
callees
and analyze_joined_string ~resolution ~taint ~state ~location substrings =
let taint =
let literal_string_sinks = TaintConfiguration.literal_string_sinks () in
if List.is_empty literal_string_sinks then
taint
else
let value =
List.map substrings ~f:(function
| Substring.Format _ -> "{}"
| Substring.Literal { Node.value; _ } -> value)
|> String.concat ~sep:""
in
List.fold
literal_string_sinks
~f:(fun taint { TaintConfiguration.sink_kind; pattern } ->
if Re2.matches pattern value then
BackwardTaint.singleton ~location sink_kind Frame.initial
|> BackwardState.Tree.create_leaf
|> BackwardState.Tree.join taint
else
taint)
~init:taint
in
let taint = BackwardState.Tree.add_local_breadcrumb (Features.format_string ()) taint in
let analyze_stringify_callee
~taint_to_join
~state_to_join
~call_target
~call_location
~base
~base_state
=
let { arguments_taint = _; self_taint; callee_taint; state = new_state } =
let callees =
CallGraph.CallCallees.create ~call_targets:[call_target] ~return_type:Type.string ()
in
let callee =
let callee_from_method_name method_name =
{
Node.value =
Expression.Name (Name.Attribute { base; attribute = method_name; special = false });
location = call_location;
}
in
match call_target.target with
| `Method { method_name; _ } -> callee_from_method_name method_name
| `OverrideTarget { method_name; _ } -> callee_from_method_name method_name
| `Function function_name ->
{ Node.value = Name (Name.Identifier function_name); location = call_location }
| `Object _ -> failwith "callees should be either methods or functions"
in
apply_callees_and_return_arguments_taint
~resolution
~callee
~call_location
~arguments:[]
~state:base_state
~call_taint:taint
callees
in
let new_taint =
match callee_taint, self_taint with
| None, Some self_taint -> BackwardState.Tree.join taint_to_join self_taint
| None, None -> taint_to_join
| Some _, None ->
failwith "Probably a rare case: callee_taint is Some and self_taint is None"
| Some _, Some _ -> failwith "callee_taint and self_taint should not co-exist"
in
new_taint, join state_to_join new_state
in
let analyze_nested_expression state = function
| Substring.Format ({ Node.location = expression_location; _ } as expression) ->
let new_taint, new_state =
match get_format_string_callees ~location:expression_location with
| Some { CallGraph.FormatStringCallees.call_targets } ->
List.fold
call_targets
~init:(taint, state)
~f:(fun (taint_to_join, state_to_join) call_target ->
analyze_stringify_callee
~taint_to_join
~state_to_join
~call_target
~call_location:expression_location
~base:expression
~base_state:state)
| _ -> taint, state
in
analyze_expression ~resolution ~taint:new_taint ~state:new_state ~expression
| Substring.Literal _ -> state
in
List.fold (List.rev substrings) ~f:analyze_nested_expression ~init:state
and analyze_expression ~resolution ~taint ~state ~expression:({ Node.location; _ } as expression) =
log
"Backward analysis of expression: `%a` with backward taint: %a"
Expression.pp
expression
BackwardState.Tree.pp
taint;
match expression.Node.value with
| Await expression -> analyze_expression ~resolution ~taint ~state ~expression
| BooleanOperator { left; operator = _; right } ->
analyze_expression ~resolution ~taint ~state ~expression:right
|> fun state -> analyze_expression ~resolution ~taint ~state ~expression:left
| ComparisonOperator ({ left; operator = _; right } as comparison) -> (
match ComparisonOperator.override ~location comparison with
| Some override -> analyze_expression ~resolution ~taint ~state ~expression:override
| None ->
let taint =
BackwardState.Tree.add_local_breadcrumbs (Features.type_bool_scalar_set ()) taint
in
analyze_expression ~resolution ~taint ~state ~expression:right
|> fun state -> analyze_expression ~resolution ~taint ~state ~expression:left)
| Call { callee; arguments } ->
analyze_call ~resolution ~location ~taint ~state ~callee ~arguments
| Constant _ -> state
| Dictionary { Dictionary.entries; keywords } ->
let state = List.fold ~f:(analyze_dictionary_entry ~resolution taint) entries ~init:state in
let analyze_dictionary_keywords state keywords =
analyze_expression ~resolution ~taint ~state ~expression:keywords
in
List.fold keywords ~f:analyze_dictionary_keywords ~init:state
| DictionaryComprehension
{ Comprehension.element = { Dictionary.Entry.key; value }; generators; _ } ->
let resolution = generator_resolution ~resolution generators in
let state =
analyze_expression
~resolution
~taint:(read_tree [AccessPath.dictionary_keys] taint)
~state
~expression:key
in
let state =
analyze_expression
~resolution
~taint:(read_tree [Abstract.TreeDomain.Label.AnyIndex] taint)
~state
~expression:value
in
analyze_generators ~resolution ~state generators
| Generator comprehension -> analyze_comprehension ~resolution taint comprehension state
| Lambda { parameters = _; body } ->
(* Ignore parameter bindings and pretend body is inlined *)
analyze_expression ~resolution ~taint ~state ~expression:body
| List list ->
let total = List.length list in
List.rev list
|> List.foldi ~f:(analyze_reverse_list_element ~total ~resolution taint) ~init:state
| ListComprehension comprehension -> analyze_comprehension ~resolution taint comprehension state
| Name (Name.Identifier identifier) ->
store_taint ~weak:true ~root:(AccessPath.Root.Variable identifier) ~path:[] taint state
| Name (Name.Attribute { base; attribute = "__dict__"; _ }) ->
analyze_expression ~resolution ~taint ~state ~expression:base
| Name (Name.Attribute { base; attribute; special }) ->
analyze_attribute_access
~resolution
~location
~resolve_properties:true
~base
~attribute
~special
~base_taint:BackwardState.Tree.bottom
~attribute_taint:taint
~state
| Set set ->
let element_taint = read_tree [Abstract.TreeDomain.Label.AnyIndex] taint in
List.fold
set
~f:(fun state expression ->
analyze_expression ~resolution ~taint:element_taint ~state ~expression)
~init:state
| SetComprehension comprehension -> analyze_comprehension ~resolution taint comprehension state
| Starred (Starred.Once expression)
| Starred (Starred.Twice expression) ->
let taint = BackwardState.Tree.prepend [Abstract.TreeDomain.Label.AnyIndex] taint in
analyze_expression ~resolution ~taint ~state ~expression
| FormatString substrings ->
analyze_joined_string
~resolution
~taint
~state
~location:(Location.with_module ~module_reference:FunctionContext.qualifier location)
substrings
| Ternary { target; test; alternative } ->
let state_then = analyze_expression ~resolution ~taint ~state ~expression:target in
let state_else = analyze_expression ~resolution ~taint ~state ~expression:alternative in
join state_then state_else
|> fun state ->
analyze_expression ~resolution ~taint:BackwardState.Tree.empty ~state ~expression:test
| Tuple list ->
let total = List.length list in
List.rev list
|> List.foldi ~f:(analyze_reverse_list_element ~total ~resolution taint) ~init:state
| UnaryOperator { operator = _; operand } ->
analyze_expression ~resolution ~taint ~state ~expression:operand
| WalrusOperator { target; value } ->
analyze_expression ~resolution ~taint ~state ~expression:value
|> fun state -> analyze_expression ~resolution ~taint ~state ~expression:target
| Yield None -> state
| Yield (Some expression)
| YieldFrom expression ->
let access_path = { AccessPath.root = AccessPath.Root.LocalResult; path = [] } in
let return_taint = get_taint (Some access_path) state in
analyze_expression ~resolution ~taint:return_taint ~state ~expression
(* Returns the taint, and whether to collapse one level (due to star expression) *)
and compute_assignment_taint ~resolution target state =
match target.Node.value with
| Expression.Starred (Once target | Twice target) ->
(* This is approximate. Unless we can get the tuple type on the right to tell how many total
elements there will be, we just pick up the entire collection. *)
let taint, _ = compute_assignment_taint ~resolution target state in
taint, true
| List targets
| Tuple targets ->
let compute_tuple_target_taint position taint_accumulator target =
let taint, collapse = compute_assignment_taint ~resolution target state in
let index_taint =
if collapse then
taint
else
let index_name = Abstract.TreeDomain.Label.Index (string_of_int position) in
BackwardState.Tree.prepend [index_name] taint
in
BackwardState.Tree.join index_taint taint_accumulator
in
let taint =
List.foldi targets ~f:compute_tuple_target_taint ~init:BackwardState.Tree.empty
in
taint, false
| Call
{
callee = { Node.value = Name (Name.Attribute { base; attribute = "__getitem__"; _ }); _ };
arguments = [{ Call.Argument.value = index; _ }];
} ->
let taint =
compute_assignment_taint ~resolution base state
|> fst
|> BackwardState.Tree.read [AccessPath.get_index index]
in
taint, false
| _ ->
let taint =
let local_taint =
let access_path = AccessPath.of_expression target in
get_taint access_path state
in
let global_taint =
GlobalModel.from_expression
~resolution
~call_graph:FunctionContext.call_graph_of_define
~qualifier:FunctionContext.qualifier
~expression:target
~interval:FunctionContext.caller_class_interval
|> GlobalModel.get_sinks
|> Issue.SinkTreeWithHandle.join
in
BackwardState.Tree.join local_taint global_taint
in
taint, false
and analyze_assignment ~resolution ?(fields = []) ~target ~value state =
let taint = compute_assignment_taint ~resolution target state |> fst |> read_tree fields in
let state =
let rec clear_taint state target =
match Node.value target with
| Expression.Tuple items -> List.fold items ~f:clear_taint ~init:state
| _ -> (
match AccessPath.of_expression target with
| Some { root; path } ->
{
taint =
BackwardState.assign
~root
~path:(path @ fields)
BackwardState.Tree.empty
state.taint;
}
| None -> state)
in
clear_taint state target
in
analyze_expression ~resolution ~taint ~state ~expression:value
let analyze_statement ~resolution state { Node.value = statement; _ } =
match statement with
| Statement.Statement.Assign
{ value = { Node.value = Expression.Constant Constant.Ellipsis; _ }; _ } ->
state
| Assign { target = { Node.location; value = target_value } as target; value; _ } -> (
let target_global_model =
GlobalModel.from_expression
~resolution
~call_graph:FunctionContext.call_graph_of_define
~qualifier:FunctionContext.qualifier
~expression:target
~interval:FunctionContext.caller_class_interval
in
if GlobalModel.is_sanitized target_global_model then
analyze_expression ~resolution ~taint:BackwardState.Tree.bottom ~state ~expression:value
else
match target_value with
| Expression.Name (Name.Attribute { base; attribute; _ }) ->
let attribute_access_callees = get_attribute_access_callees ~location ~attribute in
let property_call_state =
match attribute_access_callees with
| Some { property_targets = _ :: _ as property_targets; return_type; _ } ->
(* Treat `a.property = x` as `a = a.property(x)` *)
let taint = compute_assignment_taint ~resolution base state |> fst in
apply_callees
~resolution
~is_property:true
~callee:target
~call_location:location
~arguments:[{ name = None; value }]
~state
~call_taint:taint
(CallGraph.CallCallees.create ~call_targets:property_targets ~return_type ())
| _ -> bottom
in
let attribute_state =
match attribute_access_callees with
| Some { is_attribute = true; _ }
| None ->
analyze_assignment ~resolution ~target ~value state
| _ -> bottom
in
join property_call_state attribute_state
| _ -> analyze_assignment ~resolution ~target ~value state)
| Assert _
| Break
| Class _
| Continue ->
state
| Define define -> analyze_definition ~define state
| Delete expressions ->
let process_expression state expression =
match AccessPath.of_expression expression with
| Some { AccessPath.root; path } ->
{ taint = BackwardState.assign ~root ~path BackwardState.Tree.bottom state.taint }
| _ -> state
in
List.fold expressions ~init:state ~f:process_expression
| Expression expression ->
analyze_expression ~resolution ~taint:BackwardState.Tree.empty ~state ~expression
| For _
| Global _
| If _
| Import _
| Match _
| Nonlocal _
| Pass
| Raise _ ->
state
| Return { expression = Some expression; _ } ->
let access_path = { AccessPath.root = AccessPath.Root.LocalResult; path = [] } in
let return_taint = get_taint (Some access_path) state in
analyze_expression ~resolution ~taint:return_taint ~state ~expression
| Return { expression = None; _ }
| Try _
| With _
| While _ ->
state
let backward ~statement_key state ~statement =
TaintProfiler.track_statement_analysis
~profiler
~analysis:TaintProfiler.Backward
~statement
~f:(fun () ->
log
"Backward analysis of statement: `%a`@,With backward state: %a"
Statement.pp
statement
pp
state;
let resolution =
let { Node.value = { Statement.Define.signature = { parent; _ }; _ }; _ } =
FunctionContext.definition
in
TypeCheck.resolution_with_key
~global_resolution
~local_annotations
~parent
~statement_key
(* TODO(T65923817): Eliminate the need of creating a dummy context here *)
(module TypeCheck.DummyContext)
in
analyze_statement ~resolution state statement)
let forward ~statement_key:_ _ ~statement:_ = failwith "Don't call me"
end
(* Split the inferred entry state into externally visible taint_in_taint_out parts and sink_taint. *)
let extract_tito_and_sink_models define ~is_constructor ~resolution ~existing_backward entry_taint =
let { Statement.Define.signature = { parameters; _ }; _ } = define in
let {
TaintConfiguration.analysis_model_constraints =
{
maximum_model_width;
maximum_return_access_path_length;
maximum_trace_length;
maximum_tito_depth;
_;
};
_;
}
=
TaintConfiguration.get ()
in
let normalized_parameters = AccessPath.Root.normalize_parameters parameters in
(* Simplify trees by keeping only essential structure and merging details back into that. *)
let simplify annotation tree =
let annotation = Option.map ~f:(GlobalResolution.parse_annotation resolution) annotation in
let type_breadcrumbs = Features.type_breadcrumbs ~resolution annotation in
let essential =
if is_constructor then
BackwardState.Tree.essential_for_constructor tree
else
BackwardState.Tree.essential tree
in
BackwardState.Tree.shape
~transform:(BackwardTaint.add_local_breadcrumbs (Features.widen_broadening_set ()))
tree
~mold:essential
|> BackwardState.Tree.add_local_breadcrumbs type_breadcrumbs
|> BackwardState.Tree.limit_to
~transform:(BackwardTaint.add_local_breadcrumbs (Features.widen_broadening_set ()))
~width:maximum_model_width
|> BackwardState.Tree.approximate_return_access_paths ~maximum_return_access_path_length
in
let split_and_simplify model (parameter, name, original) =
let annotation = original.Node.value.Parameter.annotation in
let partition =
BackwardState.read ~root:(AccessPath.Root.Variable name) ~path:[] entry_taint
|> BackwardState.Tree.partition BackwardTaint.kind By ~f:Sinks.discard_transforms
in
let taint_in_taint_out =
let breadcrumbs_to_attach, via_features_to_attach =
BackwardState.extract_features_to_attach
~root:parameter
~attach_to_kind:Sinks.Attach
existing_backward.Model.Backward.taint_in_taint_out
in
let candidate_tree =
Map.Poly.find partition Sinks.LocalReturn
|> Option.value ~default:BackwardState.Tree.empty
|> simplify annotation
in
let candidate_tree =
match maximum_tito_depth with
| Some maximum_tito_depth ->
BackwardState.Tree.prune_maximum_length maximum_tito_depth candidate_tree
| _ -> candidate_tree
in
let candidate_tree =
candidate_tree
|> BackwardState.Tree.add_local_breadcrumbs breadcrumbs_to_attach
|> BackwardState.Tree.add_via_features via_features_to_attach
in
let number_of_paths =
BackwardState.Tree.fold
BackwardState.Tree.Path
~init:0
~f:(fun _ count -> count + 1)
candidate_tree
in
if number_of_paths > TaintConfiguration.maximum_tito_leaves then
BackwardState.Tree.collapse_to
~transform:(BackwardTaint.add_local_breadcrumbs (Features.widen_broadening_set ()))
~depth:0
candidate_tree
else
candidate_tree
in
let sink_taint =
let simplify_sink_taint ~key:sink ~data:sink_tree accumulator =
match sink with
| Sinks.LocalReturn
(* For now, we don't propagate partial sinks at all. *)
| Sinks.PartialSink _
| Sinks.Attach ->
accumulator
| _ ->
let sink_tree =
match maximum_trace_length with
| Some maximum_trace_length ->
BackwardState.Tree.prune_maximum_length maximum_trace_length sink_tree
| _ -> sink_tree
in
simplify annotation sink_tree |> BackwardState.Tree.join accumulator
in
Map.Poly.fold ~init:BackwardState.Tree.empty ~f:simplify_sink_taint partition
in
let sink_taint =
let breadcrumbs_to_attach, via_features_to_attach =
BackwardState.extract_features_to_attach
~root:parameter
~attach_to_kind:Sinks.Attach
existing_backward.Model.Backward.sink_taint
in
sink_taint
|> BackwardState.Tree.add_local_breadcrumbs breadcrumbs_to_attach
|> BackwardState.Tree.add_via_features via_features_to_attach
in
Model.Backward.
{
taint_in_taint_out =
BackwardState.assign ~root:parameter ~path:[] taint_in_taint_out model.taint_in_taint_out;
sink_taint = BackwardState.assign ~root:parameter ~path:[] sink_taint model.sink_taint;
}
in
List.fold normalized_parameters ~f:split_and_simplify ~init:Model.Backward.empty
let run
?(profiler = TaintProfiler.none)
~environment
~qualifier
~define
~call_graph_of_define
~existing_model
~triggered_sinks
=
let timer = Timer.start () in
let ({ Node.value = { Statement.Define.signature = { name; _ }; _ }; _ } as define) =
(* Apply decorators to make sure we match parameters up correctly. *)
let resolution = TypeEnvironment.ReadOnly.global_resolution environment in
Annotated.Define.create define
|> Annotated.Define.decorate ~resolution
|> Annotated.Define.define
in
let module FunctionContext = struct
let qualifier = qualifier
let definition = define
let debug = Statement.Define.dump define.value
let profiler = profiler
let environment = environment
let call_graph_of_define = call_graph_of_define
let triggered_sinks = triggered_sinks
let caller_class_interval = ClassInterval.SharedMemory.of_definition definition
end
in
let module State = State (FunctionContext) in
let module Fixpoint = Fixpoint.Make (State) in
let initial = State.{ taint = initial_taint } in
let cfg = Cfg.create define.value in
let () = State.log "Backward analysis of callable: `%a`" Reference.pp name in
let entry_state =
Metrics.with_alarm name (fun () -> Fixpoint.backward ~cfg ~initial |> Fixpoint.entry) ()
in
let () =
match entry_state with
| Some entry_state -> State.log "Entry state:@,%a" State.pp entry_state
| None -> State.log "No entry state found"
in
let resolution = TypeEnvironment.ReadOnly.global_resolution environment in
let extract_model State.{ taint; _ } =
let model =
extract_tito_and_sink_models
~is_constructor:(State.is_constructor ())
define.value
~resolution
~existing_backward:existing_model.Model.backward
taint
in
let () = State.log "Backward Model:@,%a" Model.Backward.pp model in
model
in
Statistics.performance
~randomly_log_every:1000
~always_log_time_threshold:1.0 (* Seconds *)
~name:"Backward analysis"
~normals:["callable", Reference.show name]
~section:`Taint
~timer
();
entry_state >>| extract_model |> Option.value ~default:Model.Backward.empty