understanding_rl_vision/rl_clarity/__init__.py (50 lines of code) (raw):

from .interface import generate from .loading import load from .compiling import recompile_js from .training import train def run( checkpoint_path, *, output_dir, load_kwargs={}, trajectories_kwargs={}, observations_kwargs={}, **generate_kwargs ): """Generate an interface from a checkpoint file. Arguments: checkpoint_path: path to checkpoint file, a joblib file containing a dictionary with these keys - params: saved model parameters as a dictionary mapping tensor names to numpy arrays - args: dictionary of metadata with these keys - env_name: name of the Procgen environment required if env_kind is 'procgen' - env_id: lowercase id of the Atari environment required if env_kind is 'atari' - env_kind: either 'procgen' or 'atari' defaults to 'procgen' - gamma: GAE hyperparameter gamma used to train the model defaults to None - lambda: GAE hyperparameter lambda used to train the model defaults to None - cnn: model architecture, one of 'clear', 'impala' or 'nature' defaults to 'clear' - any other optional arguments used to create the environment or get the architecture output_dir: path to directory where interface is to be saved required load_kwargs: dictionary with keys for any of the following - resample: whether to process the checkpoint file from scratch, rather than reusing samples previously saved to a non-temporary location defaults to True - model_path: lucid model save location - metadata_path: metadata dictionary save location - trajectories_path: trajectories save location - observations_path: additional observations save location - full_resolution: whether to also save observations in human-scale resolution (significant performance cost) defaults to False - temp_files: if any of the above paths is not specified, whether to default to a temporary location rather than a sudirectory of the checkpoint file's directory defaults to False trajectories_kwargs: dictionary with keys for any of the following only used if resampling - num_envs: number of trajectories to collect defaults to 8 - num_steps: length of each trajectory defaults to 512 observations_kwargs: dictionary with keys for any of the following only used if resampling - num_envs: number of environments to collect additional observations from in parallel defaults to 32 - num_obs: number of additional observations to collect from each parallel environment defaults to 128 - obs_every: number of steps to wait between each observation defaults to 128 model_bytes: lucid model, represented as a save file's bytes defaults to being extracted automatically observations: numpy array of additional observations used for feature visualization defaults to being extracted automatically observations_full: numpy array of the additional observations in human-scale resolution, or None to only use observations at the resolution seen by the model defaults to being extracted automatically, or None if human-scale resolution observations were not saved trajectories: dictionary of trajectories with keys 'observations', 'actions', 'rewards', either 'firsts' or 'dones', and optionally 'observations_full', each value being a numpy array with first two dimensions batch and timestep defaults to being extracted automatically policy_logits_name: name of tensor of policy logits defaults to being extracted automatically value_function_name: name of tensor of value function defaults to being extracted automatically env_name: Procgen environment name, used to help infer action_combos if that is not provided defaults to being extracted automatically, or 'unknown' if that fails numpy_precision: number of significant figures to round numpy arrays in the HTML file to defaults to 6 inline_js: whether to include the JavaScript in the HTML file inline, rather than referencing a separate file defaults to True (to avoid ad-blocker issues) inline_large_json: whether to include large amounts of JSON data in the HTML file inline, rather than referencing separate files defaults to whether output_dir does not contain '://' batch_size: size of minibatch of observations to pass through model defaults to 512 action_combos: list of tuples of strings describing the combinations of buttons triggered by each action defaults to being extracted automatically, or [('0',), ..., ('<num_actions - 1>',)] if that fails action_group_fns: list of function filters for grouping the action combos in different ways defaults to [ lambda combo: 'RIGHT' in combo, lambda combo: 'LEFT' in combo, lambda combo: 'UP' in combo, lambda combo: 'DOWN' in combo, lambda combo: 'RIGHT' not in combo and 'LEFT' not in combo and 'UP' not in combo and 'DOWN' not in combo ] layer_kwargs: dictionary of options for choosing layers, with keys for any of the following - name_contains_one_of: list of strings each layer name must contain one of, or None to not filter by name defaults to None - op_is_one_of: list of strings each layer op must be one of defaults to ['relu'] - bottleneck_only: whether to only include layers such that every path to an earlier convolutional layer passes through a bottleneck of the network defaults to True - discard_first_n: number of first layers to discard defaults to 0 input_layer_include: whether to additionally calcuate gradients with respect to the input layer defaults to False input_layer_name: display name of the input layer defaults to 'input' gae_gamma: gamma for computing advantages using GAE defaults to being extracted automatically, or 0.999 if that fails gae_lambda: lambda for computing advantages using GAE defaults to being extracted automatically, or 0.95 if that fails trajectory_bookmarks: number of links to display to highest advantage episodes and to lowest advantage episodes defaults to 16 nmf_features: number of dimensions for NMF dimensionality reduction defaults to 8 nmf_attr_opts: dictionary of options for computing attribution for NMF dimensionality reduction, the main one being integrate_steps (explained below, see attr_integrate_steps) defaults to {'integrate_steps': 10}, though if a dictionary is provided without an 'integrate_steps' key, then integrate_steps defaults to 1 vis_subdiv_mults: list of values of subdiv_mult, the spatial resolution of the grid of dataset examples used for feature visualization, as a mulitple of the resolution of the layer's activations defaults to [0.25, 0.5, 1, 2] vis_subdiv_mult_default: default value of subdiv_mult (explained above) defaults to 1 vis_expand_mults: list of values of expand_mult, the height and width of each patch used for feature visualization, as a multiple of the number of pixels if the layer were overlaid on the observation defaults to [1, 2, 4, 8] vis_expand_mult_default: default value of expand_mult (explained above) defaults to 4 vis_thumbnail_num_mult: spatial resolution of the grid of dataset examples used for feature visualization thumbnails defaults to 4 vis_thumbnail_expand_mult: the height and width of each patch used for feature visualization thumbnails, as a multiple of the number of pixels if the layer were overlaid on the observation defaults to 4 scrub_range: horizonal interval of observations and attribution used to construct scrubs defaults to (42 / 64, 44 / 64) attr_integrate_steps: number of points on the path used for numerical integration for computing attribution defaults to 10 attr_max_paths: maximum number of paths for multi-path attribution, or None to use single-path attribution defaults to None attr_policy: whether to compute attribution for the policy defaults to False attr_single_channels: whether to allow attribution for single channels to be displayed defaults to True observations_subdir: name of subdirectory containing additional observations defaults to 'observations/' trajectories_subdir: name of subdirectory containing trajectory observations defaults to 'trajectories/' trajectories_scrub_subdir: name of subdirectory containing scrubs of trajectory observations defaults to 'trajectories_scrub/' features_subdir: name of subdirectory containing feature visualizations defaults to 'features/' thumbnails_subdir: name of subdirectory containing feature thumbnails defaults to 'thumbnails/' attribution_subdir: name of subdirectory containing attribution defaults to 'attribution/' attribution_scrub_subdir: name of subdirectory containing scrubs of attribution defaults to 'attribution_scrub/' video_height: css height of each video screen defaults to '16em' video_width: css width of each video screen defaults to '16em' video_speed: speed of vidoes in frames per second defaults to 12 policy_display_height: css height of bar displaying policy defaults to '2em' policy_display_width: css width of bar displaying policy defaults to '40em' navigator_width: css width of navigator bar defaults to '24em' scrubber_height: css height of each scrubber defaults to '4em' scrubber_width: css width of each scrubber defaults to '48em' scrubber_visible_duration: number of frames visible in each scrubber defaults to 256 legend_item_height: css height of each legend item defaults to '6em' legend_item_width: css width of each legend item defaults to '6em' feature_viewer_height: css height of feature visualizations in the popup defaults to '40em' feature_viewer_width: css width of feature visualizations in the popup defaults to '40em' attribution_weight: css opacity of attribution when overlaid on observations (taking into account the fact that attribution is mostly transparent) defaults to 0.9 graph_colors: dictionary specifying css colors of graphs of each type defaults to { 'v': 'green', 'action': 'red', 'action_group': 'orange', 'advantage': 'blue' } trajectory_color: css color of text displaying trajectory information such as actions and rewards defaults to 'blue' """ import tensorflow as tf from mpi4py import MPI from baselines.common.mpi_util import setup_mpi_gpus comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() setup_mpi_gpus() exn = None if rank == 0 and load_kwargs.get("resample", True): kwargs = load( checkpoint_path, trajectories_kwargs=trajectories_kwargs, observations_kwargs=observations_kwargs, **load_kwargs ) comm.barrier() else: comm.barrier() load_kwargs["resample"] = False try: kwargs = load( checkpoint_path, trajectories_kwargs=trajectories_kwargs, observations_kwargs=observations_kwargs, **load_kwargs ) except tf.errors.NotFoundError as e: exn = e kwargs = None errors = comm.allreduce(0 if exn is None else 1, op=MPI.SUM) if errors == size: raise FileNotFoundError from exn elif errors > 0: kwargs = comm.bcast(kwargs, root=0) kwargs["output_dir"] = output_dir kwargs.update(generate_kwargs) generate(**kwargs)