scenario-notebooks/Automated-Notebooks/AutomationGallery-CredentialScanOnAzureDataExplorer.ipynb (368 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "source": [ "# Automation Gallery - Credential Scan on Azure Data Explorer\n", "\n", "__Notebook Version:__ 1.0<br>\n", "__Python Version:__ Python 3.8<br>\n", "__Apache Spark Version:__ 3.1<br>\n", "__Required Packages:__ azure-kusto-data, azure-mgmt-kusto<br>\n", "__Platforms Supported:__ Azure Synapse Analytics\n", " \n", "__Data Source Required:__ Data Explorer data tables \n", " \n", "### Description\n", "This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Data Explorer using Azure SDK for Python and KQL.<br>\n", "*** Need to download and install Python modules for Azure Data Explorer. ***<br>\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\n", "Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/azure/data-explorer/kusto/concepts/).\n", "\n", "## Table of Contents\n", "1. Warm-up\n", "2. Azure Authentication\n", "3. Azure data Explorer Queries" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# If you need to know what Python modules are available, you may run this:\n", "# help(\"modules\")\n", "# During the installation, there maybe some incompatible errors, you can safely ignore them.\n", "#!pip install azure-kusto-data\n", "#!pip install azure-mgmt-kusto --upgrade" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1632434315203 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } }, "scrolled": true } }, { "cell_type": "code", "source": [ "# Load Python libraries that will be used in this notebook\n", "from azure.mgmt.resource import ResourceManagementClient\n", "from azure.mgmt.kusto import KustoManagementClient\n", "from azure.kusto.data.exceptions import KustoServiceError\n", "from azure.kusto.data.helpers import dataframe_from_result_table\n", "from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties\n", "from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential\n", "\n", "import time\n", "import pandas as pd\n", "import json\n", "import ipywidgets\n", "from IPython.display import display, HTML, Markdown\n", "import warnings" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1632434338878 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } }, "collapsed": true } }, { "cell_type": "code", "source": [ "# Functions will be used in this notebook\n", "def get_credscan_kql_where_clause(column_name):\n", " \"This function return the KQL where clause for credscan\"\n", " where_clause = \" | where {0} \"\n", " regex_string = \"\"\n", " regex_list = [\n", " r\"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n", " r\"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n", " r\"(?-i)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n", " r\"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n", " r\"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i)[a-z2-7]{52}\\\\W\",\n", " r\"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n", " r\"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n", " r\"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n", " r\"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n", " r\"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n", " r\"(?-i)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n", " r\"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n", " r\"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n", " r\"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n", " r\"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " r\"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n", " r\"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n", " r\"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n", " r\"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n", " r\"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n", " r\"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " r\"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n", " r\"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n", " r\"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n", " r\"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n", " r\"(?i)xox[pbar]\\\\-[a-z0-9]\",\n", " r\"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n", " r\"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n", " r\"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n", " r\"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n", " r\"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n", " r\"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n", " ]\n", "\n", " for (i, re_str) in enumerate(regex_list):\n", " if i != 0:\n", " if i == 27:\n", " regex_string += \" and \"\n", " else:\n", " regex_string += \" or \" \n", "\n", " regex_string += \" \" + column_name + \" matches regex \\\"\" + re_str + \"\\\"\"\n", "\n", " return where_clause.format(regex_string)\n" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1632434343365 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } }, "collapsed": true } }, { "cell_type": "markdown", "source": [ "## 2. Azure Authentication" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "tenant_id = ''\r\n", "subscription_id = ''\r\n", "akv_name = ''\r\n", "client_id_name = ''\r\n", "client_secret_name = ''\r\n", "akv_link_name = ''\r\n", "group_name = ''\r\n", "adx_cluster_name = ''" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "tags": [ "parameters" ] } }, { "cell_type": "code", "source": [ "client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n", "client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "credential = ClientSecretCredential(\r\n", " tenant_id=tenant_id, \r\n", " client_id=client_id, \r\n", " client_secret=client_secret)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "kusto_client = KustoManagementClient(credential, subscription_id = subscription_id)\r\n", "if kusto_client != None:\r\n", " cluster_list = list(kusto_client.clusters.list_by_resource_group(group_name))\r\n", " if cluster_list != None:\r\n", " cluster_uris = [c.uri for c in cluster_list if c.name == adx_cluster_name]\r\n", " database_list = kusto_client.databases.list_by_cluster(group_name, cluster_name=adx_cluster_name)\r\n", " kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster_uris[0], client_id, client_secret, tenant_id)\r\n", " kusto_data_client = KustoClient(kcsb)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 3. Azure Data Explorer Queries" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "if kusto_data_client != None and database_list != None:\r\n", " database_name_list = sorted([c.name for c in database_list])\r\n", " for database_name in database_name_list:\r\n", " db_name = database_name.split(\"/\")[1]\r\n", " tables = kusto_data_client.execute_mgmt(db_name, \".show tables details\")\r\n", " table_list = dataframe_from_result_table(tables.primary_results[0]).TableName.tolist()\r\n", " if table_list != None:\r\n", " #table_dropdown = ipywidgets.Dropdown(options=sorted(table_list), description='Tables:')\r\n", " column_name = \"*\"\r\n", " for table in table_list:\r\n", " kql_where_clause = get_credscan_kql_where_clause(column_name)\r\n", " query = \"{0} {1}\".format(table, kql_where_clause)\r\n", " print(table)\r\n", "\r\n", " try:\r\n", " # Run query\r\n", " result = kusto_data_client.execute_query(database=db_name, query=query)\r\n", "\r\n", " # Display Result\r\n", " final_result = dataframe_from_result_table(result.primary_results[0])\r\n", " if final_result.size != 0:\r\n", " display(final_result)\r\n", " except:\r\n", " print(\"==============================\");\r\n", " print(\" This table got http error:\")\r\n", " print(\"==============================\");\r\n" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } } ], "metadata": { "kernelspec": { "name": "synapse_pyspark", "display_name": "Synapse PySpark" }, "language_info": { "name": "python" }, "description": null, "save_output": true, "synapse_widget": { "version": "0.1", "state": {} } }, "nbformat": 4, "nbformat_minor": 2 }