in vissl/utils/hydra_config.py [0:0]
def infer_and_assert_hydra_config(cfg, engine_name: str):
"""
Infer values of few parameters in the config file using the value of other config parameters
1. Inferring losses
2. Auto scale learning rate if user has specified auto scaling to be True.
3. Infer meter names (model layer name being evaluated) since we support list meters
that have multiple output and same target. This is very common in self-supervised
learning where we want to evaluate metric for several layers of the models. VISSL
supports running evaluation for multiple model layers in a single training run.
4. Support multi-gpu DDP eval model by attaching a dummy parameter. This is particularly
helpful for the multi-gpu feature extraction especially when the dataset is large for
which features are being extracted.
5. Infer what kind of labels are being used. If user has specified a labels source, we set
LABEL_TYPE to "standard" (also vissl default), otherwise if no label is specified, we
set the LABEL_TYPE to "sample_index".
"""
cfg = infer_losses_config(cfg)
cfg = infer_learning_rate(cfg)
assert_transforms(cfg)
# pass the seed to cfg["MODEL"] so that model init on different nodes can
# use the same seed.
# TODO (Min): once FSDP supports sync'ing weights from rank 0, we don't need
# this anymore.
cfg["MODEL"]["_MODEL_INIT_SEED"] = cfg.SEED_VALUE
# in case of linear evaluation, we often evaluate several layers at a time. For each
# layer, there's a separate accuracy meter. In such case, we want to output the layer
# name in the meters output to make it easy to interpret the results. This is
# currently only supported for cases where we have linear evaluation.
if cfg.METERS is not None:
from vissl.models import is_feature_extractor_model
# Ensure backwards compatibility of cfg.METERS.name.
meter_name = cfg.METERS.get("name", "")
if meter_name:
meter_names = set(cfg.METERS.get("names", []))
meter_names.add(meter_name)
cfg.METERS.names = list(meter_names)
meter_names = cfg.METERS.get("names", [])
valid_meters = [
"accuracy_list_meter",
"mean_ap_list_meter",
"precision_at_k_list_meter",
"recall_at_k_list_meter",
]
for meter_name in meter_names:
if meter_name in valid_meters:
feat_eval_ops_map = (
cfg.MODEL.FEATURE_EVAL_SETTINGS.LINEAR_EVAL_FEAT_POOL_OPS_MAP
)
all_meter_names = [item[0] for item in feat_eval_ops_map]
if is_feature_extractor_model(cfg.MODEL):
cfg.METERS[meter_name]["num_meters"] = len(feat_eval_ops_map)
cfg.METERS[meter_name]["meter_names"] = all_meter_names
elif engine_name == "extract_label_predictions":
if len(feat_eval_ops_map) > 0:
cfg.METERS[meter_name]["num_meters"] = len(feat_eval_ops_map)
cfg.METERS[meter_name]["meter_names"] = all_meter_names
else:
# if user is not extracting from multiple layers, we assume
# the model head is being used.
cfg.METERS[meter_name]["num_meters"] = 1
# in SSL, during pre-training we don't want to use annotated labels or during feature
# extraction, we don't have annotated labels for some datasets. In such cases, we set
# the label type to be just the image index in the dataset, unless the
# user has specifically provided "zero" as the label type, which is
# necessary when the CutMixUp collator is being used for self-supervised
# training.
if len(cfg.DATA.TRAIN.LABEL_SOURCES) == 0 and cfg.DATA.TRAIN.LABEL_TYPE != "zero":
cfg.DATA.TRAIN.LABEL_TYPE = "sample_index"
if len(cfg.DATA.TEST.LABEL_SOURCES) == 0 and cfg.DATA.TEST.LABEL_TYPE != "zero":
cfg.DATA.TEST.LABEL_TYPE = "sample_index"
# if the user has specified the model initialization from a params_file, we check if
# the params_file is a url. If it is, we download the file to a local cache directory
# and use that instead
from vissl.utils.checkpoint import get_checkpoint_folder
from vissl.utils.io import cache_url, is_url
if is_url(cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE):
checkpoint_dir = get_checkpoint_folder(cfg)
cache_dir = f"{checkpoint_dir}/params_file_cache/"
cached_url_path = cache_url(
url=cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE, cache_dir=cache_dir
)
cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE = cached_url_path
# ZeRO2: Infer the settings for ShardedDDP which shards the optimizer state
# and the model weights. For ShardedDDP, we must use the OSS optimizer,
# set the right task name, use the PyTorch AMP if AMP is used.
if cfg.MODEL.SHARDED_DDP_SETUP.USE_SDP:
cfg.OPTIMIZER.use_zero = True
cfg.TRAINER.TASK_NAME = "self_supervision_sdp_task"
if cfg.MODEL.AMP_PARAMS.USE_AMP:
cfg.MODEL.AMP_PARAMS.AMP_TYPE = "pytorch"
# if we use a zero optimizer, we nest the optimizer related settings under the
# base_optimizer.
if cfg.OPTIMIZER.use_zero:
cfg.OPTIMIZER["base_optimizer"] = cfg.OPTIMIZER.copy()
cfg.OPTIMIZER.name = "zero"
del cfg.OPTIMIZER.base_optimizer["param_schedulers"]
del cfg.OPTIMIZER.base_optimizer["regularize_bn"]
del cfg.OPTIMIZER.base_optimizer["regularize_bias"]
del cfg.OPTIMIZER.base_optimizer["num_epochs"]
del cfg.OPTIMIZER.base_optimizer["use_zero"]
del cfg.OPTIMIZER.base_optimizer["head_optimizer_params"]
# Infer fsdp settings
cfg = infer_fsdp(cfg)
if cfg.DATA.TRAIN.BASE_DATASET == "generic_ssl":
assert (
cfg.DATA.TRAIN.get("TRAIN_PHASES_PER_EPOCH", 1) == 1
), "When using the generic_ssl, we must set TRAIN_PHASES_PER_EPOCH = 1."
if cfg.METERS.model_output_mask:
assert (
len(cfg.DATA.TEST.DATA_SOURCES) > 0
), "Model output mask is only applicable when there is a test dataset."
assert (
cfg.DATA.TEST.BASE_DATASET == "generic_ssl"
), "Model output mask is only supported with ssl dataset."
# Remove CHECK_NAN hooks, as model output masking casts the logits
# to -inf, which will throw an error from the CHECK_NAN hooks.
cfg.HOOKS.CHECK_NAN = False