in migration/emr/emr_migration.py [0:0]
def upload_notebooks(local_folder, domain_id, project_id, emr_studio_id, emr_workspace_id, region):
if not local_folder:
print("No local folder provided. Skipping notebook upload.")
return
else:
if not emr_studio_id or not emr_workspace_id:
raise ValueError("EMR Studio ID and Workspace ID are required when uploading notebooks")
if not os.path.exists(local_folder):
raise ValueError(f"Local folder {local_folder} does not exist")
repo = get_project_repo(domain_id, project_id, region)
print(f"Uploading notebook from local folder {local_folder} to CodeCommit repo {repo}...")
code_commit = boto3.client('codecommit', region_name=region)
branch = "main"
putFilesList = []
for (root, folders, files) in os.walk(local_folder):
for file in files:
file_path = os.path.join(root, file)
print("Local file: " + file_path)
# If the file_path has '.git', then ignore it, because it will cause git pull to fail.
if ".git" in file_path:
print("Ignoring file: " + file_path)
continue
print("Uploading to: " + str(file_path).replace(local_folder, f'emr_notebooks/{emr_studio_id}/{emr_workspace_id}'))
with open(file_path, mode='r+b') as file_obj:
file_content = file_obj.read()
putFileEntry = {
'filePath': str(file_path).replace(local_folder, f'emr_notebooks/{emr_studio_id}/{emr_workspace_id}'),
'fileContent': file_content
}
putFilesList.append(putFileEntry)
parent_commit_id = code_commit.get_branch(repositoryName=repo, branchName=branch).get("branch").get("commitId")
code_commit.create_commit(
repositoryName=repo,
branchName=branch,
parentCommitId=parent_commit_id,
putFiles=putFilesList
)
print(f"Uploaded notebook from local folder {local_folder} to CodeCommit repo {repo}.")