def start()

in parquet_flask/cdms_lambda_func/index_to_es/s3_stat_extractor.py [0:0]


    def start(self):
        split_s3_url = self.__s3_url.split('://')
        if len(split_s3_url) != 2:
            raise ValueError(f'invalid S3 URL: {self.__s3_url}')
        split_s3_path = split_s3_url[1].strip().split('/')
        if len(split_s3_path) < 2:
            raise ValueError(f'invalid s3 path: {split_s3_url[1]}')
        self.bucket = split_s3_path[0]
        self.name = split_s3_path[-1]

        partition_dict = [k.split('=') for k in split_s3_path[1: -1] if '=' in k]
        partition_dict = {k[0]: k[1] for k in partition_dict}

        if CDMSConstants.provider_col in partition_dict:
            self.provider = partition_dict[CDMSConstants.provider_col]

        if CDMSConstants.project_col in partition_dict:
            self.project = partition_dict[CDMSConstants.project_col]

        if CDMSConstants.platform_code_col in partition_dict:
            self.platform_code = partition_dict[CDMSConstants.platform_code_col]

        if CDMSConstants.geo_spatial_interval_col in partition_dict:
            self.geo_interval = partition_dict[CDMSConstants.geo_spatial_interval_col]

        if CDMSConstants.year_col in partition_dict:
            self.year = partition_dict[CDMSConstants.year_col]

        if CDMSConstants.month_col in partition_dict:
            self.month = partition_dict[CDMSConstants.month_col]

        if CDMSConstants.job_id_col in partition_dict:
            self.job_id = partition_dict[CDMSConstants.job_id_col]
        return self