tools/workload/benchmark_velox/analysis/perf_analysis_template.ipynb (437 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"appid=''\n",
"disk=''\n",
"nic=''\n",
"tz=''\n",
"base_dir=''\n",
"name=''\n",
"notebook=''\n",
"notebook_html=''\n",
"proxy=''\n",
"emails=''\n",
"pr=''\n",
"\n",
"comp_appid=''\n",
"comp_base_dir=''\n",
"comp_name=''\n",
"\n",
"baseline_appid=''\n",
"baseline_base_dir=''"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%html\n",
"<style>\n",
"div.output_stderr {\n",
"background: #ffdd;\n",
"display: none;\n",
"}\n",
"</style>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import findspark\n",
"findspark.init()\n",
"\n",
"import os\n",
"import time\n",
"import sys\n",
"from pyspark import SparkConf, SparkContext\n",
"from pyspark.sql import SQLContext\n",
"\n",
"def get_py4jzip():\n",
" spark_home=os.environ['SPARK_HOME']\n",
" py4jzip = !ls {spark_home}/python/lib/py4j*.zip\n",
" return py4jzip[0]\n",
"\n",
"conf = (SparkConf()\n",
" .set('spark.app.name', f'perf_analysis_{appid}')\n",
" .set('spark.serializer','org.apache.spark.serializer.KryoSerializer')\n",
" .set('spark.executor.instances', '4')\n",
" .set('spark.executor.cores','4')\n",
" .set('spark.executor.memory', '8g')\n",
" .set('spark.driver.memory','20g')\n",
" .set('spark.memory.offHeap.enabled','True')\n",
" .set('spark.memory.offHeap.size','20g')\n",
" .set('spark.executor.memoryOverhead','1g')\n",
" .set('spark.executor.extraJavaOptions',\n",
" '-XX:+UseParallelGC -XX:+UseParallelOldGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps')\n",
" .set('spark.executorEnv.PYTHONPATH',f\"{os.environ['SPARK_HOME']}/python:{get_py4jzip()}:{':'.join(sys.path)}\")\n",
" .set('spark.sql.inMemoryColumnarStorage.compressed','False')\n",
" .set('spark.sql.inMemoryColumnarStorage.batchSize','100000')\n",
" .set('spark.sql.execution.arrow.pyspark.fallback.enabled','True')\n",
" .set('spark.sql.execution.arrow.pyspark.enabled','True')\n",
" .set('spark.sql.execution.arrow.maxRecordsPerBatch','100000')\n",
" .set(\"spark.sql.repl.eagerEval.enabled\", True)\n",
" .set(\"spark.sql.legacy.timeParserPolicy\",\"LEGACY\") \n",
" .set(\"spark.sql.session.timeZone\", tz)\n",
" )\n",
"\n",
"sc = SparkContext(conf=conf,master='yarn')\n",
"sc.setLogLevel(\"ERROR\")\n",
"spark = SQLContext(sc)\n",
"time.sleep(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"%run ~/PAUS/sparklog.ipynb"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"https_proxy\"] = proxy\n",
"os.environ[\"http_proxy\"] = proxy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"emonmetric=['emon_cpuutil',\n",
" 'emon_cpufreq',\n",
" 'emon_instr_retired',\n",
" 'emon_ipc']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"disk_prefix=[f\"'{dev}'\" for dev in disk.split(',')]\n",
"nic_prefix=[f\"'{dev}'\" for dev in nic.split(',')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"display(HTML('<a href=#App-info> 1 App info</a>'))\n",
"display(HTML(f\"<a href=#Compare-to{'-' + comp_name if comp_name else ''}> 2 Compare to {comp_name}</a>\"))\n",
"display(HTML('<a href=#Config-compare> 3 Config compare</a>'))\n",
"display(HTML('<a href=#Compare-to-baseline> 4 Compare to baseline</a>'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# App info"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"app=Application_Run(appid, basedir=base_dir)\n",
"appals=app.analysis['app']['als']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stats=appals.get_basic_state()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"summary=app.get_summary(show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n",
"display(summary.style)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"traceview=app.generate_trace_view(showemon=True,show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"appals.get_app_name()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"allconfs=appals.get_spark_config().to_dict()[0]\n",
"if 'spark.plugins' in allconfs and allconfs['spark.plugins'] == 'org.apache.gluten.GlutenPlugin':\n",
" shuffle_df, dfx=appals.get_shuffle_stat()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"appals.get_app_info(disk_prefix=disk_prefix,nic_prefix=nic_prefix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"appals.show_critical_path_time_breakdown().T"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if emails:\n",
" mail_list=' '.join(emails.split(','))\n",
" body,title=generate_email_body_title(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, notebook, notebook_html, traceview, stats, summary, pr)\n",
" !mail -a \"Content-type: text/html; charset=utf-8\" -s \"$title\" $mail_list < $body"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compare to"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if comp_appid:\n",
" comp_app=Application_Run(comp_appid,basedir=comp_base_dir)\n",
" output=app.compare_app(rapp=comp_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n",
" display(HTML(output))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Config compare"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if comp_appid:\n",
" comp_appals=comp_app.analysis['app']['als']\n",
" display(comp_spark_conf(appals, comp_appals))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compare to baseline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if baseline_appid:\n",
" baseline_app=Application_Run(baseline_appid,basedir=baseline_base_dir)\n",
" output=app.compare_app(rapp=baseline_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n",
" display(HTML(output))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Convert to HTML"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%javascript\n",
"IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# htmlname=nb_name.replace(\"ipynb\",\"html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !jupyter nbconvert --to html ./{nb_name} --no-input --output html/{htmlname} --template classic"
]
}
],
"metadata": {
"celltoolbar": "Tags",
"hide_input": false,
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"nbTranslate": {
"displayLangs": [
"*"
],
"hotkey": "alt-t",
"langInMainMenu": true,
"sourceLang": "en",
"targetLang": "fr",
"useGoogleTranslate": true
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": false,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "197px",
"left": "2188px",
"top": "111px",
"width": "269px"
},
"toc_section_display": true,
"toc_window_display": true
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}