tools/workload/benchmark_velox/analysis/perf_analysis_template.ipynb (437 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "appid=''\n", "disk=''\n", "nic=''\n", "tz=''\n", "base_dir=''\n", "name=''\n", "notebook=''\n", "notebook_html=''\n", "proxy=''\n", "emails=''\n", "pr=''\n", "\n", "comp_appid=''\n", "comp_base_dir=''\n", "comp_name=''\n", "\n", "baseline_appid=''\n", "baseline_base_dir=''" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%html\n", "<style>\n", "div.output_stderr {\n", "background: #ffdd;\n", "display: none;\n", "}\n", "</style>" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import findspark\n", "findspark.init()\n", "\n", "import os\n", "import time\n", "import sys\n", "from pyspark import SparkConf, SparkContext\n", "from pyspark.sql import SQLContext\n", "\n", "def get_py4jzip():\n", " spark_home=os.environ['SPARK_HOME']\n", " py4jzip = !ls {spark_home}/python/lib/py4j*.zip\n", " return py4jzip[0]\n", "\n", "conf = (SparkConf()\n", " .set('spark.app.name', f'perf_analysis_{appid}')\n", " .set('spark.serializer','org.apache.spark.serializer.KryoSerializer')\n", " .set('spark.executor.instances', '4')\n", " .set('spark.executor.cores','4')\n", " .set('spark.executor.memory', '8g')\n", " .set('spark.driver.memory','20g')\n", " .set('spark.memory.offHeap.enabled','True')\n", " .set('spark.memory.offHeap.size','20g')\n", " .set('spark.executor.memoryOverhead','1g')\n", " .set('spark.executor.extraJavaOptions',\n", " '-XX:+UseParallelGC -XX:+UseParallelOldGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps')\n", " .set('spark.executorEnv.PYTHONPATH',f\"{os.environ['SPARK_HOME']}/python:{get_py4jzip()}:{':'.join(sys.path)}\")\n", " .set('spark.sql.inMemoryColumnarStorage.compressed','False')\n", " .set('spark.sql.inMemoryColumnarStorage.batchSize','100000')\n", " .set('spark.sql.execution.arrow.pyspark.fallback.enabled','True')\n", " .set('spark.sql.execution.arrow.pyspark.enabled','True')\n", " .set('spark.sql.execution.arrow.maxRecordsPerBatch','100000')\n", " .set(\"spark.sql.repl.eagerEval.enabled\", True)\n", " .set(\"spark.sql.legacy.timeParserPolicy\",\"LEGACY\") \n", " .set(\"spark.sql.session.timeZone\", tz)\n", " )\n", "\n", "sc = SparkContext(conf=conf,master='yarn')\n", "sc.setLogLevel(\"ERROR\")\n", "spark = SQLContext(sc)\n", "time.sleep(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "%run ~/PAUS/sparklog.ipynb" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "os.environ[\"https_proxy\"] = proxy\n", "os.environ[\"http_proxy\"] = proxy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "emonmetric=['emon_cpuutil',\n", " 'emon_cpufreq',\n", " 'emon_instr_retired',\n", " 'emon_ipc']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "disk_prefix=[f\"'{dev}'\" for dev in disk.split(',')]\n", "nic_prefix=[f\"'{dev}'\" for dev in nic.split(',')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "display(HTML('<a href=#App-info> 1 App info</a>'))\n", "display(HTML(f\"<a href=#Compare-to{'-' + comp_name if comp_name else ''}> 2 Compare to {comp_name}</a>\"))\n", "display(HTML('<a href=#Config-compare> 3 Config compare</a>'))\n", "display(HTML('<a href=#Compare-to-baseline> 4 Compare to baseline</a>'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# App info" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "app=Application_Run(appid, basedir=base_dir)\n", "appals=app.analysis['app']['als']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "stats=appals.get_basic_state()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "summary=app.get_summary(show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", "display(summary.style)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "traceview=app.generate_trace_view(showemon=True,show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "appals.get_app_name()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allconfs=appals.get_spark_config().to_dict()[0]\n", "if 'spark.plugins' in allconfs and allconfs['spark.plugins'] == 'org.apache.gluten.GlutenPlugin':\n", " shuffle_df, dfx=appals.get_shuffle_stat()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "appals.get_app_info(disk_prefix=disk_prefix,nic_prefix=nic_prefix)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "appals.show_critical_path_time_breakdown().T" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if emails:\n", " mail_list=' '.join(emails.split(','))\n", " body,title=generate_email_body_title(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, notebook, notebook_html, traceview, stats, summary, pr)\n", " !mail -a \"Content-type: text/html; charset=utf-8\" -s \"$title\" $mail_list < $body" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compare to" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if comp_appid:\n", " comp_app=Application_Run(comp_appid,basedir=comp_base_dir)\n", " output=app.compare_app(rapp=comp_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", " display(HTML(output))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Config compare" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if comp_appid:\n", " comp_appals=comp_app.analysis['app']['als']\n", " display(comp_spark_conf(appals, comp_appals))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compare to baseline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if baseline_appid:\n", " baseline_app=Application_Run(baseline_appid,basedir=baseline_base_dir)\n", " output=app.compare_app(rapp=baseline_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", " display(HTML(output))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Convert to HTML" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%javascript\n", "IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# htmlname=nb_name.replace(\"ipynb\",\"html\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !jupyter nbconvert --to html ./{nb_name} --no-input --output html/{htmlname} --template classic" ] } ], "metadata": { "celltoolbar": "Tags", "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" }, "nbTranslate": { "displayLangs": [ "*" ], "hotkey": "alt-t", "langInMainMenu": true, "sourceLang": "en", "targetLang": "fr", "useGoogleTranslate": true }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": false, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "197px", "left": "2188px", "top": "111px", "width": "269px" }, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }