In [None]:
appid=''
disk=''
nic=''
tz=''
base_dir=''
name=''
notebook=''
notebook_html=''
proxy=''
emails=''
pr=''

comp_appid=''
comp_base_dir=''
comp_name=''

baseline_appid=''
baseline_base_dir=''

In [None]:
%%html
<style>
div.output_stderr {
background: #ffdd;
display: none;
}
</style>

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import findspark
findspark.init()

import os
import time
import sys
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext

def get_py4jzip():
    spark_home=os.environ['SPARK_HOME']
    py4jzip = !ls {spark_home}/python/lib/py4j*.zip
    return py4jzip[0]

conf = (SparkConf()
    .set('spark.app.name', f'perf_analysis_{appid}')
    .set('spark.serializer','org.apache.spark.serializer.KryoSerializer')
    .set('spark.executor.instances', '4')
    .set('spark.executor.cores','4')
    .set('spark.executor.memory', '8g')
    .set('spark.driver.memory','20g')
    .set('spark.memory.offHeap.enabled','True')
    .set('spark.memory.offHeap.size','20g')
    .set('spark.executor.memoryOverhead','1g')
    .set('spark.executor.extraJavaOptions',
          '-XX:+UseParallelGC -XX:+UseParallelOldGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps')
    .set('spark.executorEnv.PYTHONPATH',f"{os.environ['SPARK_HOME']}/python:{get_py4jzip()}:{':'.join(sys.path)}")
    .set('spark.sql.inMemoryColumnarStorage.compressed','False')
    .set('spark.sql.inMemoryColumnarStorage.batchSize','100000')
    .set('spark.sql.execution.arrow.pyspark.fallback.enabled','True')
    .set('spark.sql.execution.arrow.pyspark.enabled','True')
    .set('spark.sql.execution.arrow.maxRecordsPerBatch','100000')
    .set("spark.sql.repl.eagerEval.enabled", True)
    .set("spark.sql.legacy.timeParserPolicy","LEGACY")     
    .set("spark.sql.session.timeZone", tz)
       )

sc = SparkContext(conf=conf,master='yarn')
sc.setLogLevel("ERROR")
spark = SQLContext(sc)
time.sleep(10)

In [None]:
%run ~/PAUS/sparklog.ipynb

In [None]:
os.environ["https_proxy"] = proxy
os.environ["http_proxy"] = proxy

In [None]:
emonmetric=['emon_cpuutil',
            'emon_cpufreq',
            'emon_instr_retired',
            'emon_ipc']

In [None]:
disk_prefix=[f"'{dev}'" for dev in disk.split(',')]
nic_prefix=[f"'{dev}'" for dev in nic.split(',')]

In [None]:
display(HTML('<a href=#App-info> 1 App info</a>'))
display(HTML(f"<a href=#Compare-to{'-' + comp_name if comp_name else ''}> 2 Compare to {comp_name}</a>"))
display(HTML('<a href=#Config-compare> 3 Config compare</a>'))
display(HTML('<a href=#Compare-to-baseline> 4 Compare to baseline</a>'))

# App info

In [None]:
app=Application_Run(appid, basedir=base_dir)
appals=app.analysis['app']['als']

In [None]:
stats=appals.get_basic_state()

In [None]:
summary=app.get_summary(show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)
display(summary.style)

In [None]:
traceview=app.generate_trace_view(showemon=True,show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)

In [None]:
appals.get_app_name()

In [None]:
allconfs=appals.get_spark_config().to_dict()[0]
if 'spark.plugins' in allconfs and allconfs['spark.plugins'] == 'org.apache.gluten.GlutenPlugin':
    shuffle_df, dfx=appals.get_shuffle_stat()

In [None]:
appals.get_app_info(disk_prefix=disk_prefix,nic_prefix=nic_prefix)

In [None]:
appals.show_critical_path_time_breakdown().T

In [None]:
if emails:
    mail_list=' '.join(emails.split(','))
    body,title=generate_email_body_title(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, notebook, notebook_html, traceview, stats, summary, pr)
    !mail -a "Content-type: text/html; charset=utf-8" -s "$title" $mail_list < $body

# Compare to

In [None]:
if comp_appid:
    comp_app=Application_Run(comp_appid,basedir=comp_base_dir)
    output=app.compare_app(rapp=comp_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)
    display(HTML(output))

# Config compare

In [None]:
if comp_appid:
    comp_appals=comp_app.analysis['app']['als']
    display(comp_spark_conf(appals, comp_appals))

# Compare to baseline

In [None]:
if baseline_appid:
    baseline_app=Application_Run(baseline_appid,basedir=baseline_base_dir)
    output=app.compare_app(rapp=baseline_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)
    display(HTML(output))

#  Convert to HTML

In [None]:
%%javascript
IPython.notebook.kernel.execute('nb_name = "' + IPython.notebook.notebook_name + '"')

In [None]:
# htmlname=nb_name.replace("ipynb","html")

In [None]:
# !jupyter nbconvert --to html ./{nb_name} --no-input --output html/{htmlname} --template classic