source/analysis/astEnvironment.ml (473 lines of code) (raw):
(*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open Ast
open Core
open Pyre
open PyreParser
type t = {
module_tracker: ModuleTracker.t;
additional_preprocessing: (Source.t -> Source.t) option;
}
module ParserError = struct
type t = {
source_path: SourcePath.t;
location: Location.t;
is_suppressed: bool;
message: string;
}
[@@deriving sexp, compare, hash]
end
module RawSourceValue = struct
type t = (Source.t, ParserError.t) Result.t
let prefix = Prefix.make ()
let description = "Unprocessed source"
let compare = Result.compare Source.compare ParserError.compare
let unmarshall value = Marshal.from_string value 0
end
module RawSources =
DependencyTrackedMemory.DependencyTrackedTableNoCache
(SharedMemoryKeys.ReferenceKey)
(SharedMemoryKeys.DependencyKey)
(RawSourceValue)
let create ?additional_preprocessing module_tracker = { module_tracker; additional_preprocessing }
let wildcard_exports_of ({ Source.source_path = { SourcePath.is_stub; _ }; _ } as source) =
let open Expression in
let open UnannotatedGlobal in
let extract_dunder_all = function
| {
Collector.Result.name = "__all__";
unannotated_global =
SimpleAssign { value = { Node.value = Expression.(List names | Tuple names); _ }; _ };
} ->
let to_identifier = function
| { Node.value = Expression.Constant (Constant.String { value = name; _ }); _ } ->
Some name
| _ -> None
in
Some (List.filter_map ~f:to_identifier names)
| _ -> None
in
let unannotated_globals = Collector.from_source source in
match List.find_map unannotated_globals ~f:extract_dunder_all with
| Some names -> names |> List.dedup_and_sort ~compare:Identifier.compare
| _ ->
let unannotated_globals =
(* Stubs have a slightly different rule with re-export *)
let filter_unaliased_import = function
| {
Collector.Result.unannotated_global =
Imported
(ImportEntry.Module { implicit_alias; _ } | ImportEntry.Name { implicit_alias; _ });
_;
} ->
not implicit_alias
| _ -> true
in
if is_stub then
List.filter unannotated_globals ~f:filter_unaliased_import
else
unannotated_globals
in
List.map unannotated_globals ~f:(fun { Collector.Result.name; _ } -> name)
|> List.filter ~f:(fun name -> not (String.is_prefix name ~prefix:"_"))
|> List.dedup_and_sort ~compare:Identifier.compare
module Raw = struct
let add_parsed_source _ ({ Source.source_path = { SourcePath.qualifier; _ }; _ } as source) =
RawSources.add qualifier (Result.Ok source)
let add_unparsed_source _ ({ ParserError.source_path = { SourcePath.qualifier; _ }; _ } as error) =
RawSources.add qualifier (Result.Error error)
let update_and_compute_dependencies _ ~update ~scheduler ~configuration qualifiers =
let keys = RawSources.KeySet.of_list qualifiers in
SharedMemoryKeys.DependencyKey.Transaction.empty ~scheduler ~configuration
|> RawSources.add_to_transaction ~keys
|> SharedMemoryKeys.DependencyKey.Transaction.execute ~update
let get_source _ = RawSources.get
let remove_sources _ qualifiers = RawSources.KeySet.of_list qualifiers |> RawSources.remove_batch
end
type parse_result =
| Success of Source.t
| Error of {
location: Location.t;
message: string;
is_suppressed: bool;
}
let create_source ~metadata ~source_path statements =
Source.create_from_source_path
~collect_format_strings_with_ignores:Visit.collect_format_strings_with_ignores
~metadata
~source_path
statements
let parse_source
~configuration:({ Configuration.Analysis.enable_type_comments; _ } as configuration)
~context
({ SourcePath.qualifier; _ } as source_path)
=
let parse content =
let metadata = Source.Metadata.parse ~qualifier (String.split content ~on:'\n') in
match PyreNewParser.parse_module ~enable_type_comment:enable_type_comments ~context content with
| Ok statements -> Success (create_source ~metadata ~source_path statements)
| Error { PyreNewParser.Error.line; column; end_line; end_column; message } ->
let is_suppressed =
let { Source.Metadata.local_mode; ignore_codes; _ } = metadata in
match Source.mode ~configuration ~local_mode with
| Source.Declare -> true
| _ ->
(* NOTE: The number needs to be updated when the error code changes. *)
List.exists ignore_codes ~f:(Int.equal 404)
in
let location =
(* CPython set line/column number to -1 in some exceptional cases. *)
let replace_invalid_position number = if number <= 0 then 1 else number in
let start =
{
Location.line = replace_invalid_position line;
column = replace_invalid_position column;
}
in
let stop =
(* Work around CPython bug where the end location sometimes precedes start location. *)
if [%compare: int * int] (line, column) (end_line, end_column) > 0 then
start
else
{
Location.line = replace_invalid_position end_line;
column = replace_invalid_position end_column;
}
in
{ Location.start; stop }
in
Error { location; message; is_suppressed }
in
let path = SourcePath.full_path ~configuration source_path in
try File.content_exn (File.create path) |> parse with
| Sys_error error ->
let message = Format.asprintf "Cannot open file `%a` due to: %s" PyrePath.pp path error in
Error
{
location =
{
Location.start = { Location.line = 1; column = 1 };
stop = { Location.line = 1; column = 1 };
};
message;
is_suppressed = false;
}
let parse_raw_sources ~configuration ~scheduler ~ast_environment source_paths =
let parse_and_categorize result source_path =
let do_parse context =
match parse_source ~configuration ~context source_path with
| Success ({ Source.source_path = { SourcePath.qualifier; _ }; _ } as source) ->
let source =
let {
Configuration.Analysis.python_major_version;
python_minor_version;
python_micro_version;
_;
}
=
configuration
in
Preprocessing.replace_version_specific_code
~major_version:python_major_version
~minor_version:python_minor_version
~micro_version:python_micro_version
source
|> Preprocessing.preprocess_phase0
in
Raw.add_parsed_source ast_environment source;
qualifier :: result
| Error { location; message; is_suppressed } ->
let { SourcePath.qualifier; _ } = source_path in
Raw.add_unparsed_source
ast_environment
{ ParserError.source_path; location; message; is_suppressed };
qualifier :: result
in
PyreNewParser.with_context do_parse
in
Scheduler.map_reduce
scheduler
~policy:
(Scheduler.Policy.fixed_chunk_count
~minimum_chunks_per_worker:1
~minimum_chunk_size:100
~preferred_chunks_per_worker:5
())
~initial:[]
~map:(fun _ -> List.fold ~init:[] ~f:parse_and_categorize)
~reduce:List.append
~inputs:source_paths
()
let expand_wildcard_imports ?dependency ~ast_environment source =
let open Statement in
let module Transform = Transform.MakeStatementTransformer (struct
include Transform.Identity
type t = unit
let get_transitive_exports ?dependency ~ast_environment qualifier =
let module Visitor = Visit.MakeStatementVisitor (struct
type t = Reference.t list
let visit_children _ = false
let statement _ collected_imports { Node.value; _ } =
match value with
| Statement.Import { Import.from = Some from; imports }
when List.exists imports ~f:(fun { Node.value = { Import.name; _ }; _ } ->
String.equal (Reference.show name) "*") ->
from :: collected_imports
| _ -> collected_imports
end)
in
let visited_modules = Reference.Hash_set.create () in
let transitive_exports = Identifier.Hash_set.create () in
let worklist = Queue.of_list [qualifier] in
let rec search_wildcard_imports () =
match Queue.dequeue worklist with
| None -> ()
| Some qualifier ->
let _ =
match Hash_set.strict_add visited_modules qualifier with
| Error _ -> ()
| Ok () -> (
match Raw.get_source ast_environment qualifier ?dependency with
| None
| Some (Result.Error _) ->
()
| Some (Result.Ok source) ->
wildcard_exports_of source |> List.iter ~f:(Hash_set.add transitive_exports);
Visitor.visit [] source |> Queue.enqueue_all worklist)
in
search_wildcard_imports ()
in
search_wildcard_imports ();
Hash_set.to_list transitive_exports |> List.sort ~compare:Identifier.compare
let statement state ({ Node.value; _ } as statement) =
match value with
| Statement.Import { Import.from = Some from; imports } -> (
let starred_import =
List.find imports ~f:(fun { Node.value = { Import.name; _ }; _ } ->
String.equal (Reference.show name) "*")
in
match starred_import with
| Some _ ->
let expanded_import =
match get_transitive_exports from ~ast_environment ?dependency with
| [] -> []
| exports ->
List.map exports ~f:(fun name ->
{
Node.value = { Import.name = Reference.create name; alias = Some name };
location = Location.any;
})
|> (fun expanded ->
Statement.Import { Import.from = Some from; imports = expanded })
|> fun value -> [{ statement with Node.value }]
in
state, expanded_import
| None -> state, [statement])
| _ -> state, [statement]
end)
in
Transform.transform () source |> Transform.source
let get_and_preprocess_source
?dependency
({ additional_preprocessing; _ } as ast_environment)
qualifier
=
let preprocessing =
match additional_preprocessing with
| Some additional_preprocessing ->
fun source -> Preprocessing.preprocess_phase1 source |> additional_preprocessing
| None -> Preprocessing.preprocess_phase1
in
(* Preprocessing a module depends on the module itself is implicitly assumed in `update`. No need
to explicitly record the dependency. *)
Raw.get_source ast_environment qualifier ?dependency:None
>>| function
| Result.Ok source ->
expand_wildcard_imports ?dependency ~ast_environment source
|> preprocessing
|> InlineDecorator.inline_decorators ~get_source:(fun qualifier ->
Raw.get_source ?dependency ast_environment qualifier >>= Result.ok)
| Result.Error
{ ParserError.source_path = { SourcePath.qualifier; relative; _ } as source_path; _ } ->
(* Files that have parser errors fall back into getattr-any. *)
let fallback_source = ["import typing"; "def __getattr__(name: str) -> typing.Any: ..."] in
let metadata = Source.Metadata.parse ~qualifier fallback_source in
let statements = Parser.parse_exn ~relative fallback_source in
create_source ~metadata ~source_path statements |> preprocessing
let parse_sources ~configuration ~scheduler ~ast_environment source_paths =
parse_raw_sources ~configuration ~scheduler ~ast_environment source_paths
|> List.sort ~compare:Reference.compare
module UpdateResult = struct
type t = {
triggered_dependencies: SharedMemoryKeys.DependencyKey.RegisteredSet.t;
invalidated_modules: Reference.t list;
}
let triggered_dependencies { triggered_dependencies; _ } = triggered_dependencies
let invalidated_modules { invalidated_modules; _ } = invalidated_modules
let create_for_testing () =
{
triggered_dependencies = SharedMemoryKeys.DependencyKey.RegisteredSet.empty;
invalidated_modules = [];
}
end
type trigger =
| Update of ModuleTracker.IncrementalUpdate.t list
| ColdStart
let update
~configuration:({ Configuration.Analysis.incremental_style; _ } as configuration)
~scheduler
({ module_tracker; _ } as ast_environment)
= function
| Update module_updates -> (
let reparse_source_paths, removed_modules, updated_submodules =
let categorize = function
| ModuleTracker.IncrementalUpdate.NewExplicit source_path -> `Fst source_path
| ModuleTracker.IncrementalUpdate.Delete qualifier -> `Snd qualifier
| ModuleTracker.IncrementalUpdate.NewImplicit qualifier -> `Trd qualifier
in
List.partition3_map module_updates ~f:categorize
in
match incremental_style with
| Configuration.Analysis.Shallow ->
let directly_changed_modules =
List.map reparse_source_paths ~f:(fun { SourcePath.qualifier; _ } -> qualifier)
in
Raw.remove_sources ast_environment (List.append removed_modules directly_changed_modules);
let parsed =
parse_sources ~configuration ~scheduler ~ast_environment reparse_source_paths
in
{
UpdateResult.triggered_dependencies = SharedMemoryKeys.DependencyKey.RegisteredSet.empty;
invalidated_modules = List.append updated_submodules parsed;
}
| Configuration.Analysis.FineGrained ->
let changed_modules =
let reparse_modules =
List.map reparse_source_paths ~f:(fun { SourcePath.qualifier; _ } -> qualifier)
in
List.concat [removed_modules; updated_submodules; reparse_modules]
in
let update_raw_sources () =
parse_raw_sources ~configuration ~scheduler ~ast_environment reparse_source_paths
in
let _, triggered_dependencies =
Profiling.track_duration_and_shared_memory
"Parse Raw Sources"
~tags:["phase_name", "Parsing"]
~f:(fun _ ->
Raw.update_and_compute_dependencies
ast_environment
changed_modules
~update:update_raw_sources
~scheduler
~configuration)
in
let invalidated_modules =
let fold_key registered sofar =
match SharedMemoryKeys.DependencyKey.get_key registered with
| SharedMemoryKeys.WildcardImport qualifier -> RawSources.KeySet.add qualifier sofar
| _ -> sofar
in
SharedMemoryKeys.DependencyKey.RegisteredSet.fold
fold_key
triggered_dependencies
(RawSources.KeySet.of_list changed_modules)
|> RawSources.KeySet.elements
in
{ UpdateResult.triggered_dependencies; invalidated_modules })
| ColdStart ->
let timer = Timer.start () in
Log.info
"Parsing %d stubs and sources..."
(ModuleTracker.explicit_module_count module_tracker);
let ast_environment = create module_tracker in
let parsed =
ModuleTracker.source_paths module_tracker
|> parse_sources ~configuration ~scheduler ~ast_environment
in
Statistics.performance
~name:"sources parsed"
~phase_name:"Parsing and preprocessing"
~timer
();
{
UpdateResult.invalidated_modules = parsed;
triggered_dependencies = SharedMemoryKeys.DependencyKey.RegisteredSet.empty;
}
let get_source_path { module_tracker; _ } = ModuleTracker.lookup_source_path module_tracker
(* Both `load` and `store` are no-ops here since `Sources` and `WildcardExports` are in shared
memory, and `Memory.load_shared_memory`/`Memory.save_shared_memory` will take care of the
(de-)serialization for us. *)
let store _ = ()
let load = create ?additional_preprocessing:None
module ReadOnly = struct
type t = {
get_processed_source: track_dependency:bool -> Reference.t -> Source.t option;
get_raw_source: Reference.t -> (Source.t, ParserError.t) Result.t option;
get_source_path: Reference.t -> SourcePath.t option;
all_explicit_modules: unit -> Reference.t list;
is_module_tracked: Reference.t -> bool;
}
let create
?(get_processed_source = fun ~track_dependency:_ _ -> None)
?(get_raw_source = fun _ -> None)
?(get_source_path = fun _ -> None)
?(all_explicit_modules = fun _ -> [])
?(is_module_tracked = fun _ -> false)
()
=
{
get_processed_source;
get_raw_source;
get_source_path;
all_explicit_modules;
is_module_tracked;
}
let get_processed_source { get_processed_source; _ } ?(track_dependency = false) =
get_processed_source ~track_dependency
let get_raw_source { get_raw_source; _ } = get_raw_source
let get_source_path { get_source_path; _ } = get_source_path
let get_relative read_only qualifier =
let open Option in
get_source_path read_only qualifier >>| fun { SourcePath.relative; _ } -> relative
let get_real_path ~configuration read_only qualifier =
get_source_path read_only qualifier >>| SourcePath.full_path ~configuration
let get_real_path_relative
~configuration:({ Configuration.Analysis.local_root; _ } as configuration)
read_only
qualifier
=
(* SourcePath.relative refers to the renamed path when search paths are provided with a root and
subdirectory. Instead, find the real filesystem relative path for the qualifier. *)
get_real_path ~configuration read_only qualifier
>>= fun path -> PyrePath.get_relative_to_root ~root:local_root ~path
let is_module_tracked { is_module_tracked; _ } = is_module_tracked
let all_explicit_modules { all_explicit_modules; _ } = all_explicit_modules ()
end
let remove_sources = Raw.remove_sources
let read_only ({ module_tracker; _ } as environment) =
let get_processed_source ~track_dependency qualifier =
let dependency =
if track_dependency then
Some
(SharedMemoryKeys.DependencyKey.Registry.register
(SharedMemoryKeys.WildcardImport qualifier))
else
None
in
get_and_preprocess_source ?dependency environment qualifier
in
{
ReadOnly.get_processed_source;
get_raw_source = RawSources.get;
get_source_path = get_source_path environment;
all_explicit_modules = (fun () -> ModuleTracker.tracked_explicit_modules module_tracker);
is_module_tracked = ModuleTracker.is_module_tracked module_tracker;
}
let module_tracker { module_tracker; _ } = module_tracker
let with_additional_preprocessing ~additional_preprocessing environment =
{ environment with additional_preprocessing }