in src/data_utils.py [0:0]
def save_regional_data(data_dict, meta_dict, bucket):
"""Uploads the labeled data for a region to s3
Args:
data_dict: dictionary containing base image, mangrove and non-mangrove data frames
meta_dict: dictionary containing metadata
bucket: s3 bucket name
"""
df_training = pd.concat([data_dict["df_mangrove"], data_dict["df_other"]], axis=0)
df_training = shuffle(df_training)
# fname = f"{meta_dict['src_dataset']}_year{meta_dict['year']}_{meta_dict['poi']}.csv"
fname = f"{meta_dict['src_dataset']}/Year{meta_dict['year']}/{meta_dict['poi']}.csv"
df_training.to_csv(f"s3://{bucket}/{fname}", index=False)
num_rows = df_training.label.value_counts()
print(
f"rows: {len(df_training)}, rows_mangrove = {num_rows[1]}, rows_other = {num_rows[0]}"
)