in tabular/src/autogluon/tabular/models/fastainn/tabular_nn_fastai.py [0:0]
def _fit(self,
X,
y,
X_val=None,
y_val=None,
time_limit=None,
num_cpus=None,
num_gpus=0,
sample_weight=None,
**kwargs):
try_import_fastai()
from fastai.tabular.model import tabular_config
from fastai.tabular.learner import tabular_learner
from fastai import torch_core
from .callbacks import AgSaveModelCallback, EarlyStoppingCallbackWithTimeLimit
from .quantile_helpers import HuberPinballLoss
start_time = time.time()
if sample_weight is not None: # TODO: support
logger.log(15, "sample_weight not yet supported for NNFastAiTabularModel, this model will ignore them in training.")
params = self._get_model_params()
self.y_scaler = params.get('y_scaler', None)
if self.y_scaler is None:
if self.problem_type == REGRESSION:
self.y_scaler = sklearn.preprocessing.StandardScaler()
elif self.problem_type == QUANTILE:
self.y_scaler = sklearn.preprocessing.MinMaxScaler()
else:
self.y_scaler = copy.deepcopy(self.y_scaler)
if num_gpus is not None:
# TODO: Control CPU vs GPU usage during inference
if num_gpus == 0:
torch_core.default_device(use_cuda=False)
else:
# TODO: respect CUDA_VISIBLE_DEVICES to select proper GPU
torch_core.default_device(use_cuda=True)
logger.log(15, f'Fitting Neural Network with parameters {params}...')
data = self._preprocess_train(X, y, X_val, y_val)
nn_metric, objective_func_name = self.__get_objective_func_name(self.stopping_metric)
objective_func_name_to_monitor = self.__get_objective_func_to_monitor(objective_func_name)
objective_optim_mode = np.less if objective_func_name in [
'log_loss',
'root_mean_squared_error', 'mean_squared_error', 'mean_absolute_error', 'median_absolute_error', # Regression objectives
'pinball_loss', # Quantile objective
] else np.greater
# TODO: calculate max emb concat layer size and use 1st layer as that value and 2nd in between number of classes and the value
if params.get('layers', None) is not None:
layers = params['layers']
elif self.problem_type in [REGRESSION, BINARY]:
layers = [200, 100]
elif self.problem_type == QUANTILE:
base_size = max(len(self.quantile_levels) * 4, 128)
layers = [base_size, base_size, base_size]
else:
base_size = max(data.c * 2, 100)
layers = [base_size * 2, base_size]
loss_func = None
if self.problem_type == QUANTILE:
loss_func = HuberPinballLoss(self.quantile_levels, alpha=self.params['alpha'])
best_epoch_stop = params.get("best_epoch", None) # Use best epoch for refit_full.
batch_size = self._get_batch_size(X)
dls = data.dataloaders(bs=batch_size)
# Make deterministic
from fastai.torch_core import set_seed
set_seed(0, True)
dls.rng.seed(0)
if self.problem_type == QUANTILE:
dls.c = len(self.quantile_levels)
self.model = tabular_learner(
dls, layers=layers, metrics=nn_metric,
config=tabular_config(ps=params['ps'], embed_p=params['emb_drop']),
loss_func=loss_func,
)
logger.log(15, self.model.model)
fname = 'model'
save_callback = AgSaveModelCallback(
monitor=objective_func_name_to_monitor, comp=objective_optim_mode, fname=fname,
best_epoch_stop=best_epoch_stop, with_opt=True
)
if time_limit is not None:
time_elapsed = time.time() - start_time
time_left = time_limit - time_elapsed
if time_left <= time_limit * 0.7: # if 30% of time was spent preprocessing, likely not enough time to train model
raise TimeLimitExceeded
else:
time_left = None
early_stopping = EarlyStoppingCallbackWithTimeLimit(
monitor=objective_func_name_to_monitor,
comp=objective_optim_mode,
min_delta=params['early.stopping.min_delta'],
patience=params['early.stopping.patience'],
time_limit=time_left, best_epoch_stop=best_epoch_stop
)
callbacks = [save_callback, early_stopping]
with make_temp_directory() as temp_dir:
with self.model.no_bar():
with self.model.no_logging():
original_path = self.model.path
self.model.path = Path(temp_dir)
len_val = len(X_val) if X_val is not None else 0
epochs = self._get_epochs_number(samples_num=len(X) + len_val, epochs=params['epochs'], batch_size=batch_size, time_left=time_left)
if epochs == 0:
# Stop early if there is not enough time to train a full epoch
raise TimeLimitExceeded
self.model.fit_one_cycle(epochs, params['lr'], cbs=callbacks)
# Load the best one and export it
self.model = self.model.load(fname)
if objective_func_name == 'log_loss':
eval_result = self.model.validate(dl=dls.valid)[0]
else:
eval_result = self.model.validate(dl=dls.valid)[1]
logger.log(15, f'Model validation metrics: {eval_result}')
self.model.path = original_path
self.params_trained['epochs'] = epochs
self.params_trained['best_epoch'] = save_callback.best_epoch