in parquet_flask/cdms_lambda_func/index_to_es/s3_stat_extractor.py [0:0]
def start(self):
split_s3_url = self.__s3_url.split('://')
if len(split_s3_url) != 2:
raise ValueError(f'invalid S3 URL: {self.__s3_url}')
split_s3_path = split_s3_url[1].strip().split('/')
if len(split_s3_path) < 2:
raise ValueError(f'invalid s3 path: {split_s3_url[1]}')
self.bucket = split_s3_path[0]
self.name = split_s3_path[-1]
partition_dict = [k.split('=') for k in split_s3_path[1: -1] if '=' in k]
partition_dict = {k[0]: k[1] for k in partition_dict}
if CDMSConstants.provider_col in partition_dict:
self.provider = partition_dict[CDMSConstants.provider_col]
if CDMSConstants.project_col in partition_dict:
self.project = partition_dict[CDMSConstants.project_col]
if CDMSConstants.platform_code_col in partition_dict:
self.platform_code = partition_dict[CDMSConstants.platform_code_col]
if CDMSConstants.geo_spatial_interval_col in partition_dict:
self.geo_interval = partition_dict[CDMSConstants.geo_spatial_interval_col]
if CDMSConstants.year_col in partition_dict:
self.year = partition_dict[CDMSConstants.year_col]
if CDMSConstants.month_col in partition_dict:
self.month = partition_dict[CDMSConstants.month_col]
if CDMSConstants.job_id_col in partition_dict:
self.job_id = partition_dict[CDMSConstants.job_id_col]
return self