def run()

in crop_yield_prediction/models/deep_gaussian_process/base.py [0:0]
53 lines of code
9 McCabe index (conditional complexity)

    def run(self, times, train_years, path_to_histogram=Path('data/deep_gaussian/data.npz'),
            pred_years=None, num_runs=2, train_steps=25000, batch_size=64,
            starter_learning_rate=1e-3, weight_decay=0, l1_weight=0, patience=10):
        """
        Train the models. Note that multiple models are trained: as per the paper, a model
        is trained for each year, with all preceding years used as training values. In addition,
        for each year, 2 models are trained to account for random initialization.

        Parameters
        ----------
        path_to_histogram: pathlib Path, default=Path('data/img_output/histogram_all_full.npz')
            The location of the training data
        times: {'all', 'realtime'}
            Which time indices to train the model on. If 'all', a full run (32 timesteps) is used.
            If 'realtime', range(10, 31, 4) is used.
        pred_years: int, list or None, default=None
            Which years to build models for. If None, the default values from the paper (range(2009, 2016))
            are used.
        num_runs: int, default=2
            The number of runs to do per year. Default taken from the paper
        train_steps: int, default=25000
            The number of steps for which to train the model. Default taken from the paper.
        batch_size: int, default=32
            Batch size when training. Default taken from the paper
        starter_learning_rate: float, default=1e-3
            Starter learning rate. Note that the learning rate is divided by 10 after 2000 and 4000 training
            steps. Default taken from the paper
        weight_decay: float, default=1
            Weight decay (L2 regularization) on the model weights
        l1_weight: float, default=0
            In addition to MSE, L1 loss is also used (sometimes). This is the weight to assign to this L1 loss.
        patience: int or None, default=10
            The number of epochs to wait without improvement in the validation loss before terminating training.
            Note that the original repository doesn't use early stopping.
        """

        with np.load(path_to_histogram) as hist:
            images = hist['output_image']
            locations = hist['output_locations']
            yields = hist['output_yield']
            years = hist['output_year']
            indices = hist['output_index']

        # to collect results
        years_list, run_numbers, corr_list, r2_list, rmse_list, me_list, times_list = [], [], [], [], [], [], []
        if self.gp is not None:
            corr_gp_list, r2_gp_list, rmse_gp_list, me_gp_list = [], [], [], []

        if pred_years is None:
            pred_years = range(2014, 2019)
        elif type(pred_years) is int:
            pred_years = [pred_years]

        for pred_year in pred_years:
            for run_number in range(1, num_runs + 1):
                for time in times:
                    print(f'Training to predict on {pred_year}, Run number {run_number}')

                    results = self._run_1_year(train_years, images, yields,
                                               years, locations,
                                               indices, pred_year,
                                               time, run_number,
                                               train_steps, batch_size,
                                               starter_learning_rate,
                                               weight_decay, l1_weight,
                                               patience)

                    years_list.append(pred_year)
                    run_numbers.append(run_number)
                    times_list.append(time)

                    if self.gp is not None:
                        corr, r2, rmse, me, corr_gp, r2_gp, rmse_gp, me_gp = results
                        corr_gp_list.append(corr_gp)
                        r2_gp_list.append(r2_gp)
                        rmse_gp_list.append(rmse_gp)
                        me_gp_list.append(me_gp)
                    else:
                        corr, r2, rmse, me = results
                    corr_list.append(corr)
                    r2_list.append(r2)
                    rmse_list.append(rmse)
                    me_list.append(me)
                print('-----------')

        # save results to a csv file
        data = {'year': years_list, 'run_number': run_numbers, 'time_idx': times_list,
                'Corr': corr_list, 'R2': r2_list, 'RMSE': rmse_list, 'ME': me_list}
        if self.gp is not None:
            data['Corr_GP'] = corr_gp_list
            data['R2_GP'] = r2_gp_list
            data['RMSE_GP'] = rmse_gp_list
            data['ME_GP'] = me_gp_list
        results_df = pd.DataFrame(data=data)
        results_df.to_csv(self.savedir / f'{str(datetime.now())}.csv', index=False)