in images/jupyter-user/python-utils/notebook_runner.py [0:0]
def prepareNotebook(default_output_directory, notebook, key):
logger.info(f"prepareNotebook={notebook}")
notebookName = notebook["notebookName"]
sourcePath = notebook["sourcePath"]
targetPath = notebook.get("targetPath", default_output_directory)
targetPrefix = notebook.get("targetPrefix", key)
timestamp = time.strftime("%Y%m%d-%H:%M")
outputName = targetPrefix + "@" + timestamp + ".ipynb"
logger.debug(f"Source Path: {sourcePath}")
sourcePath = sourcePath.replace("$ORBIT_TRANSFORMATION_NOTEBOOKS_ROOT", "/opt/transformations/")
# Check for codecommit and replace with temp path
sourcePath = sourcePath.replace("codecommit::", "/tmp/")
workdir = os.path.abspath(sourcePath)
pathToNotebook = os.path.join(sourcePath, notebookName)
pathToNotebookFixed = os.path.join("/tmp", outputName)
logger.debug("Source Notebook: %s", pathToNotebook)
try:
sm_notebook = json.loads(open(pathToNotebook).read())
except Exception as e:
logger.error("error opening notebook file at: %s", pathToNotebook)
logger.error(e)
raise
if sm_notebook["metadata"]["kernelspec"]["name"] == "sparkkernel":
sm_notebook["metadata"]["kernelspec"]["language"] = "scala"
else:
sm_notebook["metadata"]["kernelspec"]["language"] = "python"
# if sm_notebook['metadata']['kernelspec']['name'] == 'conda_python3':
# sm_notebook['metadata']['kernelspec']['name'] = 'python3'
with open(pathToNotebookFixed, "w") as outfile:
json.dump(sm_notebook, outfile)
logger.debug("fixed language in notebook: %s", pathToNotebook)
pathToOutputDir = targetPath # os.path.join(outputDirectory, targetPath)
logger.info(f"pathToOutputDir={pathToOutputDir}")
if not targetPath.startswith("s3:") and not os.path.exists(pathToOutputDir):
pathToOutputDir = os.path.abspath(pathToOutputDir)
logger.info(f"creating dirs pathToOutputDir={pathToOutputDir}")
os.makedirs(pathToOutputDir, exist_ok=True)
notebookNameWithoutSufix = notebookName.split(".")[0]
pathToOutputNotebookDir = os.path.join(pathToOutputDir, notebookNameWithoutSufix)
if not targetPath.startswith("s3:") and not os.path.exists(pathToOutputNotebookDir):
pathToOutputNotebookDir = os.path.abspath(pathToOutputNotebookDir)
logger.info(f"creating dirs pathToOutputNotebookDir={pathToOutputDir}")
os.makedirs(pathToOutputNotebookDir, exist_ok=True)
pathToOutputNotebook = os.path.join(pathToOutputNotebookDir, outputName)
logger.debug("Target Notebook path: %s", pathToOutputNotebook)
logger.debug("FOUND notebook: %s", notebook)
if "paramPath" in notebook:
pathToParamPath = os.path.abspath(notebook["paramPath"])
try:
parameters = read_yaml_file(pathToParamPath)
except Exception as e:
logger.error("cannot find parameter file at: %s", pathToParamPath)
raise e
elif "params" in notebook:
try:
parameters = notebook["params"]
except Exception as e:
logger.error("fail to parse parameters: %s", notebook["params"])
raise e
else:
parameters = dict()
parameters["PAPERMILL_INPUT_PATH"] = os.path.abspath(pathToNotebookFixed)
parameters["PAPERMILL_OUTPUT_PATH"] = pathToOutputNotebook
parameters["PAPERMILL_OUTPUT_DIR_PATH"] = pathToOutputNotebookDir
parameters["PAPERMILL_WORKBOOK_NAME"] = outputName
parameters["PAPERMILL_WORK_DIR"] = os.path.abspath(workdir)
logger.debug("runtime parameters: %s", parameters)
return parameters