in src/data_utils.py [0:0]
def main():
"""
Dataset preparation for mangrove classifier training
"""
# select bucket to store dataset
s3_bucket = "sagemaker-gis"
# select satellite data, year and bands
base_sat_data = "LANDSAT/LC08/C01/T1_SR"
year = 2015
bands = "B[1-7]"
meta_dict = {"src_dataset": base_sat_data.replace("/", "_"), "year": year}
date_range = [f"{year}-01-01", f"{year}-12-31"]
# read representative coordinates for each region
df_zones = pd.read_csv("zones.csv").set_index("region")
# create dataset for each region
for area in df_zones.index:
print(f"processing data for {area}...")
point_of_int = df_zones.loc[area, ["lon", "lat"]].tolist()
data_dict = get_data_by_zone_year(
point_of_int, date_range, base_sat_data, bands
)
meta_dict["poi"] = area.replace(" ", "_")
save_regional_data(data_dict, meta_dict, s3_bucket)
# split the dataset between training and test sets
areas_for_test = ["Vietnam2", "Myanmar3", "Cuba2", "India"]
folder = f"{meta_dict['src_dataset']}/Year{meta_dict['year']}"
split_dataset(areas_for_test, s3_bucket, folder)