in crop_yield_train_c3d.py [0:0]
def crop_yield_train_c3d(args, data_dir, model_out_dir, result_out_dir, log_out_dir, start_year, end_year,
n_tsteps, train_years=None):
batch_size = 30
test_batch_size = 128
n_triplets_per_file = 1
epochs = 50
n_experiment = 2
patience = args.patience if args.patience != 9999 else None
feature = args.feature
feature_len = args.feature_len
params = '{}_nt{}_es{}_{}_tyear{}'.format(start_year, n_tsteps, patience, feature, train_years)
os.makedirs(log_out_dir, exist_ok=True)
param_model_out_dir = '{}/{}'.format(model_out_dir, params)
os.makedirs(param_model_out_dir, exist_ok=True)
param_result_out_dir = '{}/{}'.format(result_out_dir, params)
os.makedirs(param_result_out_dir, exist_ok=True)
if feature == 'all':
X_dir = '{}/nr_25_dr100'.format(data_dir)
else:
X_dir = '{}/nr_25_dr100_{}'.format(data_dir, feature)
dim_y = pd.read_csv('{}/dim_y.csv'.format(data_dir))
dim_y = dim_y.astype({'state': int, 'county': int, 'year': int, 'value': float, 'lat': float, 'lon': float})
max_index = len(dim_y) - 1
results = dict()
for year in range(start_year, end_year + 1):
print('Predict year {}......'.format(year))
test_idx = (dim_y['year'] == year)
valid_idx = (dim_y['year'] == (year - 1))
if train_years is None:
train_idx = (dim_y['year'] < (year - 1))
else:
train_idx = (dim_y['year'] < (year - 1)) & (dim_y['year'] >= (year - 1 - train_years))
y_valid, y_train = np.array(dim_y.loc[valid_idx]['value']), np.array(dim_y.loc[train_idx]['value'])
y_test, dim_test = np.array(dim_y.loc[test_idx]['value']), np.array(dim_y.loc[test_idx][['state', 'county']])
test_indices = [i for i, x in enumerate(test_idx) if x]
valid_indices = [i for i, x in enumerate(valid_idx) if x]
train_indices = [i for i, x in enumerate(train_idx) if x]
# check if the indices are sequential
assert all(elem == 1 for elem in [y - x for x, y in zip(test_indices[:-1], test_indices[1:])])
assert all(elem == 1 for elem in [y - x for x, y in zip(valid_indices[:-1], valid_indices[1:])])
assert all(elem == 1 for elem in [y - x for x, y in zip(train_indices[:-1], train_indices[1:])])
print('Train size {}, valid size {}, test size {}'.format(y_train.shape[0], y_valid.shape[0], y_test.shape[0]))
test_corr_lis, test_r2_lis, test_rmse_lis = [], [], []
test_prediction_lis = []
for i in range(n_experiment):
print('Experiment {}'.format(i))
c3d = C3D(in_channels=feature_len, n_tsteps=n_tsteps)
optimizer = optim.Adam(c3d.parameters(), lr=0.001)
trained_epochs = train_c3d(model=c3d,
X_dir=X_dir,
X_train_indices=(train_indices[0], train_indices[-1]),
y_train=y_train,
X_valid_indices=(valid_indices[0], valid_indices[-1]),
y_valid=y_valid,
X_test_indices=(test_indices[0], test_indices[-1]),
y_test=y_test,
n_tsteps=n_tsteps,
max_index=max_index,
n_triplets_per_file=n_triplets_per_file,
patience=patience,
optimizer=optimizer,
batch_size=batch_size,
test_batch_size=test_batch_size,
n_epochs=epochs,
out_dir=param_model_out_dir,
year=year,
exp_idx=i,
log_file='{}/{}.txt'.format(log_out_dir, params))
test_prediction, rmse, r2, corr = eval_test(X_dir,
X_test_indices=(test_indices[0], test_indices[-1]),
y_test=y_test,
n_tsteps=n_tsteps,
max_index=max_index,
n_triplets_per_file=n_triplets_per_file,
batch_size=test_batch_size,
model_dir=param_model_out_dir,
model=c3d,
epochs=trained_epochs,
year=year,
exp_idx=i,
log_file='{}/{}.txt'.format(log_out_dir, params))
test_corr_lis.append(corr)
test_r2_lis.append(r2)
test_rmse_lis.append(rmse)
test_prediction_lis.append(test_prediction)
test_prediction = np.mean(np.asarray(test_prediction_lis), axis=0)
np.save('{}/{}.npy'.format(param_result_out_dir, year), test_prediction)
plot_predict(test_prediction, dim_test, Path('{}/pred_{}.html'.format(param_result_out_dir, year)))
plot_predict_error(test_prediction, y_test, dim_test, Path('{}/err_{}.html'.format(param_result_out_dir, year)))
results[year] = {'test_rmse': np.around(np.mean(test_rmse_lis), 3),
'test_r2': np.around(np.mean(test_r2_lis), 3),
'test_corr': np.around(np.mean(test_corr_lis), 3)}
output_to_csv_simple(results, param_result_out_dir)