in notebooks/common/util/fcst_utils.py [0:0]
def read_explainability_export(BUCKET_NAME, s3_path):
"""Read explainability export files
Inputs:
BUCKET_NAME = S3 bucket name
s3_path = S3 path to export files
, everything after "s3://BUCKET_NAME/" in S3 URI path to your files
Return: Pandas dataframe with all files concatenated row-wise
"""
# set s3 path
s3 = boto3.resource('s3')
s3_bucket = boto3.resource('s3').Bucket(BUCKET_NAME)
s3_depth = s3_path.split("/")
s3_depth = len(s3_depth) - 1
# set local path
local_write_path = "explainability_exports"
if (os.path.exists(local_write_path) and os.path.isdir(local_write_path)):
shutil.rmtree('explainability_exports')
if not(os.path.exists(local_write_path) and os.path.isdir(local_write_path)):
os.makedirs(local_write_path)
# concat part files
part_filename = ""
part_files = list(s3_bucket.objects.filter(Prefix=s3_path))
print(f"Number .part files found: {len(part_files)}")
for file in part_files:
# There will be a collection of CSVs, modify this to go get them all
if "csv" in file.key:
part_filename = file.key.split('/')[s3_depth]
window_object = s3.Object(BUCKET_NAME, file.key)
file_size = window_object.content_length
if file_size > 0:
s3.Bucket(BUCKET_NAME).download_file(file.key, local_write_path+"/"+part_filename)
# Read from local dir and combine all the part files
temp_dfs = []
for entry in os.listdir(local_write_path):
if os.path.isfile(os.path.join(local_write_path, entry)):
df = pd.read_csv(os.path.join(local_write_path, entry), index_col=None, header=0)
temp_dfs.append(df)
# Return assembled .part files as pandas Dataframe
fcst_df = pd.concat(temp_dfs, axis=0, ignore_index=True, sort=False)
return fcst_df