in projects/hive-bigquery-connector-demo/scripts/deconstruct-jupyter-notebook.py [0:0]
def run():
"""Main entry point for the script."""
parser = get_parser()
args = parser.parse_args(sys.argv[1:])
tf_state = ScriptState.tf_state()
notebook_path = args.NOTEBOOK_PATH
paragraphs_path = args.paragraphs_path
if notebook_path == "JUPYTER":
bucket = ScriptState.gcs_client().get_bucket(
ScriptState.tf_state().staging_bucket
)
blob = bucket.get_blob("notebooks/jupyter/notebook.ipynb")
content = blob.download_as_string()
f = json.loads(content)
else:
if not os.path.exists(notebook_path) or not os.path.isfile(
notebook_path
):
print(f"{notebook_path} is not a path to a file")
sys.exit(1)
with open(notebook_path, "r", encoding="utf-8") as fp:
f = json.load(fp)
cells = f["cells"]
shutil.rmtree(paragraphs_path)
os.mkdir(paragraphs_path)
for i, cell in enumerate(cells):
num = str(i).zfill(4)
suff = cell["cell_type"].replace("markdown", "mdx")
file_path = os.path.join(paragraphs_path, f"{num}.{suff}")
content = cell["source"]
new_content = []
for line in content:
line = line.rstrip()
for state_key, state in tf_state.__dict__.items():
state_key_up = f"<{state_key.upper()}>"
for comment_char in comments_chars:
if line.endswith(
f" {comment_char} variable: {state_key_up}"
):
line = line.replace(state, f"<{state_key.upper()}>")
line = line.split(f"{comment_char} variable: ")[
0
].rstrip()
new_content.append(line.rstrip() + "\n")
if new_content:
new_content[-1] = new_content[-1].rstrip()
with open(file_path, "w", encoding="utf-8") as fp:
fp.writelines(new_content)