in smdebug/profiler/profiler_config_parser.py [0:0]
def load_config(self):
"""Load the config file (if it exists) from $SMPROFILER_CONFIG_PATH.
Set the provided values for the specified variables and default values for the rest.
Validate the detailed profiling config (if it exists).
"""
config_path = os.environ.get("SMPROFILER_CONFIG_PATH", CONFIG_PATH_DEFAULT)
if os.path.isfile(config_path):
with open(config_path) as json_data:
try:
full_config = json.loads(json_data.read().lower())
if full_config == self.last_json_config:
return
self.last_json_config = full_config
self.config = None
if full_config.get(ProfilingParametersField.DISABLE_PROFILER.value, False):
self._log_new_message(
LastProfilingStatus.PROFILER_DISABLED,
self.logger.info,
f"User has disabled profiler.",
)
self._reset_statuses()
self.profiling_enabled = False
return
except Exception as e:
self._log_new_message(
LastProfilingStatus.INVALID_CONFIG,
self.logger.error,
f"Error parsing config at {config_path}: {str(e)}",
)
self._reset_statuses()
self.config = None
self.profiling_enabled = False
return
config = full_config.get(ProfilingParametersField.PROFILING_PARAMETERS.value)
if config is None or config == {}:
self._log_new_message(
LastProfilingStatus.PROFILER_DISABLED,
self.logger.info,
f"User has disabled profiler.",
)
self._reset_statuses()
self.profiling_enabled = False
return
else:
self._log_new_message(
LastProfilingStatus.PROFILER_ENABLED,
self.logger.info,
f"Using config at {config_path}.",
)
self.profiling_enabled = True
else:
self._log_new_message(
LastProfilingStatus.CONFIG_NOT_FOUND,
self.logger.info,
f"Unable to find config at {config_path}. Profiler is disabled.",
)
self._reset_statuses()
self.profiling_enabled = False
return
try:
local_path = config.get(ProfilingParametersField.LOCAL_PATH.value, BASE_FOLDER_DEFAULT)
file_max_size = int(
float(
config.get(ProfilingParametersField.FILE_MAX_SIZE.value, MAX_FILE_SIZE_DEFAULT)
)
)
file_close_interval = float(
config.get(
ProfilingParametersField.FILE_CLOSE_INTERVAL.value, CLOSE_FILE_INTERVAL_DEFAULT
)
)
file_open_fail_threshold = int(
config.get(
ProfilingParametersField.FILE_OPEN_FAIL_THRESHOLD.value,
FILE_OPEN_FAIL_THRESHOLD_DEFAULT,
)
)
except ValueError as e:
self._log_new_message(
LastProfilingStatus.DEFAULT_VALUES,
self.logger.info,
f"{e} in {ProfilingParametersField.PROFILING_PARAMETERS}. Enabling profiling with default "
f"parameter values.",
)
local_path = BASE_FOLDER_DEFAULT
file_max_size = MAX_FILE_SIZE_DEFAULT
file_close_interval = CLOSE_FILE_INTERVAL_DEFAULT
file_open_fail_threshold = FILE_OPEN_FAIL_THRESHOLD_DEFAULT
detailed_profiling_config = self._parse_metrics_config(
config,
ProfilingParametersField.DETAILED_PROFILING_CONFIG,
LastProfilingStatus.INVALID_DETAILED_PROFILING_CONFIG,
)
dataloader_profiling_config = self._parse_metrics_config(
config,
ProfilingParametersField.DATALOADER_PROFILING_CONFIG,
LastProfilingStatus.INVALID_DATALOADER_PROFILING_CONFIG,
)
python_profiling_config = self._parse_metrics_config(
config,
ProfilingParametersField.PYTHON_PROFILING_CONFIG,
LastProfilingStatus.INVALID_PYTHON_PROFILING_CONFIG,
)
smdataparallel_profiling_config = self._parse_metrics_config(
config,
ProfilingParametersField.SMDATAPARALLEL_PROFILING_CONFIG,
LastProfilingStatus.INVALID_SMDATAPARALLEL_PROFILING_CONFIG,
)
self.config = ProfilerConfig(
local_path,
file_max_size,
file_close_interval,
file_open_fail_threshold,
detailed_profiling_config,
dataloader_profiling_config,
python_profiling_config,
smdataparallel_profiling_config,
)
# Validate the profiler config based on current training job configuration. Currently, we are disabling the
# autograd based detailed profiling for model parallel training job.
# Importing ConfigValidator here to avoid circular dependency.
from smdebug.core.config_validator import ConfigValidator
ConfigValidator.validate_profiler_config(self)
if self.config.detailed_profiling_config.error_message is not None:
self._log_new_message(
LastProfilingStatus.INVALID_DETAILED_CONFIG_FIELDS,
self.logger.error,
self.config.detailed_profiling_config.error_message,
)
if self.config.dataloader_profiling_config.error_message is not None:
self._log_new_message(
LastProfilingStatus.INVALID_DATALOADER_CONFIG_FIELDS,
self.logger.error,
self.config.dataloader_profiling_config.error_message,
)
if self.config.python_profiling_config.error_message is not None:
self._log_new_message(
LastProfilingStatus.INVALID_PYTHON_CONFIG_FIELDS,
self.logger.error,
self.config.python_profiling_config.error_message,
)
if self.config.smdataparallel_profiling_config.error_message is not None:
self._log_new_message(
LastProfilingStatus.INVALID_SMDATAPARALLEL_PROFILING_CONFIG,
self.logger.error,
self.config.smdataparallel_profiling_config.error_message,
)
self._reset_statuses()