scenario-notebooks/Automated-Notebooks/AutomationGallery-CredentialScanOnAzureBlobStorage.ipynb (655 lines of code) (raw):
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Automation Gallery - Credential Scan on Azure Blob Storage\n",
"\n",
"__Notebook Version:__ 1.0<br>\n",
"__Python Version:__ Python 3.8<br>\n",
"__Apache Spark Version:__ 3.1<br>\n",
"__Required Packages:__ No<br>\n",
"__Platforms Supported:__ Azure Synapse Analytics\n",
" \n",
"__Data Source Required:__ No \n",
" \n",
"### Description\n",
"This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Blob Storage using Azure SDK for Python.<br>\n",
"*** No need to download and install any other Python modules. ***<br>\n",
"*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\n",
"\n",
"## Table of Contents\n",
"1. Warm-up\n",
"2. Authentication to Azure Storage\n",
"3. Scan Azure Blob for Leaking Credentials\n",
"4. Save result to Microsoft Sentinel Dynamic Summaries"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 1. Warm-up"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"# Load Python libraries that will be used in this notebook\n",
"from azure.mgmt.storage import StorageManagementClient\n",
"from azure.identity import DefaultAzureCredential\n",
"from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__\n",
"from azure.identity import ClientSecretCredential\n",
"from datetime import datetime, timedelta, timezone\n",
"\n",
"import json\n",
"from json import JSONEncoder\n",
"from IPython.display import display, HTML, Markdown\n",
"import re\n",
"import pandas as pd"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1642180667371
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"collapsed": true
}
},
{
"cell_type": "code",
"source": [
"# Functions will be used in this notebook\n",
"def get_file_content(blob):\n",
" \"Decoding file content\"\n",
" try:\n",
" content = blob.content_as_text(max_concurrency=1, encoding='UTF-8')\n",
" except UnicodeDecodeError:\n",
" content = blob.content_as_text(max_concurrency=1, encoding='UTF-16')\n",
" except Exception as ex:\n",
" print(ex)\n",
" content= \"\"\n",
" return content\n",
"\n",
"def get_regex_list():\n",
" \"This function return RegEx list for credscan\"\n",
" regex_list = [\n",
" \"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n",
" \"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n",
" \"(?-i:)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n",
" \"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n",
" \"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i:)[a-z2-7]{52}\\\\W\",\n",
" \"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n",
" \"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n",
" \"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n",
" \"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n",
" \"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n",
" \"(?-i:)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n",
" \"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n",
" \"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n",
" \"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n",
" \"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n",
" \"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n",
" \"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n",
" \"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n",
" \"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n",
" \"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n",
" \"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n",
" \"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n",
" \"(?-i:)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n",
" \"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n",
" \"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n",
" \"(?i)xox[pbar]\\\\-[a-z0-9]\",\n",
" \"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n",
" \"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n",
" \"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n",
" \"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n",
" \"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n",
" \"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n",
" ]\n",
"\n",
" return regex_list\n",
"\n",
"def convert_result_to_string(result_row):\n",
" if (type(result_row)) == str:\n",
" return result_row\n",
" elif (type(result_row)) == tuple:\n",
" return ','.join([m for m in result_row if len(m) > 0])\n",
"\n",
"def file_modified_date_check(days_back, modified_date):\n",
" aware_local_now = datetime.now(timezone.utc).astimezone()\n",
" time_between_modified = aware_local_now - modified_date\n",
" return time_between_modified.days < days_back\n",
"\n",
"class file_scan_result:\n",
" \"\"\"\n",
" This class is for handling scan result for each file.\n",
" \"\"\"\n",
" def __init__(self, file_name, file_last_modified):\n",
" self.file_name = file_name\n",
" self.results = {}\n",
"\n",
" def add_result(self, key, value):\n",
" \"\"\" Add result to the dictionary, key is regex string, value will be list \"\"\"\n",
" self.results[key] = value\n",
"\n",
"class result_encoder(JSONEncoder):\n",
" def default(self, o):\n",
" return o.__dict__\n",
"\n",
"from msrest.authentication import BasicTokenAuthentication\n",
"from azure.core.pipeline.policies import BearerTokenCredentialPolicy\n",
"from azure.core.pipeline import PipelineRequest, PipelineContext\n",
"from azure.core.pipeline.transport import HttpRequest\n",
"from azure.identity import DefaultAzureCredential\n",
"\n",
"class AzureIdentityCredentialAdapter(BasicTokenAuthentication):\n",
" def __init__(self, credential=None, resource_id=\"https://management.azure.com/.default\", **kwargs):\n",
" \"\"\"Adapt any azure-identity credential to work with SDK that needs azure.common.credentials or msrestazure.\n",
" Default resource is ARM (syntax of endpoint v2)\n",
" :param credential: Any azure-identity credential (DefaultAzureCredential by default)\n",
" :param str resource_id: The scope to use to get the token (default ARM)\n",
" \"\"\"\n",
" super(AzureIdentityCredentialAdapter, self).__init__(None)\n",
" if credential is None:\n",
" credential = DefaultAzureCredential()\n",
" self._policy = BearerTokenCredentialPolicy(credential, resource_id, **kwargs)\n",
"\n",
" def _make_request(self):\n",
" return PipelineRequest(\n",
" HttpRequest(\n",
" \"AzureIdentityCredentialAdapter\",\n",
" \"https://fakeurl\"\n",
" ),\n",
" PipelineContext(None)\n",
" )\n",
"\n",
" def set_token(self):\n",
" \"\"\"Ask the azure-core BearerTokenCredentialPolicy policy to get a token.\n",
" Using the policy gives us for free the caching system of azure-core.\n",
" We could make this code simpler by using private method, but by definition\n",
" I can't assure they will be there forever, so mocking a fake call to the policy\n",
" to extract the token, using 100% public API.\"\"\"\n",
" request = self._make_request()\n",
" self._policy.on_request(request)\n",
" # Read Authorization, and get the second part after Bearer\n",
" token = request.http_request.headers[\"Authorization\"].split(\" \", 1)[1]\n",
" self.token = {\"access_token\": token}\n",
"\n",
" def get_token(self):\n",
" \"\"\"Get access token.\"\"\"\n",
" return self.token\n",
"\n",
" def signed_session(self, session=None):\n",
" self.set_token()\n",
" return super(AzureIdentityCredentialAdapter, self).signed_session(session)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"gather": {
"logged": 1642180670633
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"collapsed": true
}
},
{
"cell_type": "code",
"source": [
"import uuid\r\n",
"import requests\r\n",
"\r\n",
"class DynamicSummary():\r\n",
" \"\"\" Dynamic Summary object model \"\"\"\r\n",
" \r\n",
" @staticmethod\r\n",
" def get_new_guid():\r\n",
" \"\"\" generate new GUID \"\"\"\r\n",
" return uuid.uuid4()\r\n",
"\r\n",
" def __init__(self, summary_id):\r\n",
" self.summary_id = summary_id\r\n",
"\r\n",
" def serialize(self):\r\n",
" serialized_str = '\"summaryId\": \"' + self.summary_id + '\", \"summaryName\": \"' + self.summary_name + '\", \"azureTenantId\": \"' + self.azure_tenant_id + '\", \"summaryDescription\": \"' + self.summary_description + '\"'\r\n",
" if hasattr(self, 'relation_name') and self.relation_name != None:\r\n",
" serialized_str += ', \"relationName\": \"' + self.relation_name + '\"'\r\n",
" if hasattr(self, 'relation_id') and self.relation_id != None:\r\n",
" serialized_str += ', \"relationId\": \"' + self.relation_id + '\"'\r\n",
" if hasattr(self, 'search_key') and self.search_key != None:\r\n",
" serialized_str += ', \"searchKey\": \"' + self.search_key + '\"'\r\n",
" if hasattr(self, 'tactics') and self.tactics != None:\r\n",
" serialized_str += ', \"tactics\": \"' + self.tactics + '\"'\r\n",
" if hasattr(self, 'techniques') and self.techniques != None:\r\n",
" serialized_str += ', \"techniques\": \"' + self.techniques + '\"'\r\n",
" if hasattr(self, 'source_info') and self.source_info != None:\r\n",
" serialized_str += ', \"sourceInfo\": \"' + self.source_info + '\"'\r\n",
" if hasattr(self, 'summary_items') and self.summary_items != None:\r\n",
" serialized_str += ', \"rawContent\": \"[' + DynamicSummary.serializeItems(self.summary_items) + ']\"'\r\n",
"\r\n",
" return serialized_str\r\n",
"\r\n",
" def serializeItems(items):\r\n",
" raw_content = ''\r\n",
" isFirst = True\r\n",
" for item in items:\r\n",
" if isFirst == True:\r\n",
" isFirst = False\r\n",
" else:\r\n",
" raw_content += ','\r\n",
" \r\n",
" raw_content += json.dumps(DynamicSummary.serializeItem(item)).strip('\"')\r\n",
" return raw_content\r\n",
"\r\n",
" def serializeItem(item):\r\n",
" serialized_item_tsr = '{'\r\n",
" serialized_item_tsr += '\"summaryItemId\": \"' + item.summary_item_id.urn[9:] + '\"'\r\n",
"\r\n",
" if hasattr(item, 'relation_name') and item.relation_name != None:\r\n",
" serialized_item_tsr += ', \"relationName\": \"' + item.relation_name + '\"'\r\n",
" if hasattr(item, 'relation_id') and item.relation_id != None:\r\n",
" seriserialized_item_tsralized_str += ', \"relationId\" :\"' + item.relation_id + '\"'\r\n",
" if hasattr(item, 'search_key') and item.search_key != None:\r\n",
" serialized_item_tsr += ', \"searchKey\": \"' + item.search_key + '\"'\r\n",
" if hasattr(item, 'tactics') and item.tactics != None:\r\n",
" serialized_item_tsr += ', \"tactics\": \"' + item.tactics + '\"'\r\n",
" if hasattr(item, 'techniques') and item.techniques != None:\r\n",
" serialized_item_tsr += ', \"techniques\": \"' + item.techniques + '\"'\r\n",
" if hasattr(item, 'event_time_utc') and item.event_time_utc != None:\r\n",
" serialized_item_tsr += ', \"eventTimeUTC\" :\"' + item.event_time_utc.isoformat() + 'Z\"'\r\n",
" if hasattr(item, 'observable_type') and item.observable_type != None:\r\n",
" serialized_item_tsr += ', \"observableType\": \"' + item.observable_type + '\"'\r\n",
" if hasattr(item, 'observable_value') and item.observable_value != None:\r\n",
" serialized_item_tsr += ', \"observableValue\": \"' + item.observable_value + '\"'\r\n",
" if hasattr(item, 'packed_content') and item.packed_content != None:\r\n",
" serialized_item_tsr += ', \"packedContent\": ' + item.packed_content\r\n",
" serialized_item_tsr += '}'\r\n",
" \r\n",
" return serialized_item_tsr\r\n",
"\r\n",
" def construct_summary(self, tenant_id, summary_name, summary_description, items, \\\r\n",
" relation_name=None, relation_id=None, search_key=None, tactics=None, techniques=None, source_info=None, **kwargs):\r\n",
" \"\"\" Building summary level data object \"\"\"\r\n",
" self.summary_name = summary_name\r\n",
" self.azure_tenant_id = tenant_id\r\n",
" self.summary_description = summary_description\r\n",
" if relation_name != None:\r\n",
" self.relation_name = relation_name\r\n",
" if relation_id != None:\r\n",
" self.relation_id = relation_id\r\n",
" if search_key != None:\r\n",
" self.search_key = search_key\r\n",
" if tactics != None:\r\n",
" self.tactics = tactics\r\n",
" if techniques != None:\r\n",
" self.techniques = techniques\r\n",
" if source_info != None:\r\n",
" self.source_info = source_info\r\n",
" if summary_items != None:\r\n",
" self.summary_items = items\r\n",
"\r\n",
" def construct_summary_item(self, summary_item_id, \\\r\n",
" relation_name=None, relation_id=None, search_key=None, tactics=None, techniques=None, event_time_utc=None, observable_type=None, observable_value=None, packed_content=None, **kwargs):\r\n",
" \"\"\" Building summary item level data object \"\"\"\r\n",
" \r\n",
" item = DynamicSummary(self.summary_id)\r\n",
" item.summary_item_id = summary_item_id\r\n",
" if relation_name != None:\r\n",
" item.relation_name = relation_name\r\n",
" if relation_id != None:\r\n",
" item.relation_id = relation_id\r\n",
" if search_key != None:\r\n",
" item.search_key = search_key\r\n",
" if tactics != None:\r\n",
" item.tactics = tactics\r\n",
" if techniques != None:\r\n",
" item.techniques = techniques\r\n",
" if event_time_utc != None:\r\n",
" item.event_time_utc = event_time_utc\r\n",
" if observable_type != None:\r\n",
" item.observable_type = observable_type\r\n",
" if observable_value != None:\r\n",
" item.observable_value = observable_value\r\n",
" if packed_content != None:\r\n",
" item.packed_content = packed_content\r\n",
"\r\n",
" return item\r\n",
" \r\n",
" def construct_arm_rest_url(subscription_id, resource_group, workspace_name, summary_guid):\r\n",
" \"Build URL for Sentinel Dynamic Summaries REST API\"\r\n",
" api_version = \"2023-03-01-preview\"\r\n",
" provider_name = \"Microsoft.OperationalInsights\"\r\n",
" workspace_provider_name = \"Microsoft.SecurityInsights/dynamicSummaries\"\r\n",
" root_url = \"https://management.azure.com\"\r\n",
" arm_rest_url_template = \"{0}/subscriptions/{1}/resourceGroups/{2}/providers/{3}/workspaces/{4}/providers/{5}/{6}?api-version={7}\"\r\n",
" return arm_rest_url_template.format(root_url, subscription_id, resource_group, provider_name, workspace_name, workspace_provider_name, summary_guid, api_version)\r\n",
"\r\n",
"\r\n",
" def call_azure_rest_api_for_creating_dynamic_summary(token, arm_rest_url, summary):\r\n",
" \"Calling Microsoft Sentinel REST API\"\r\n",
" bearer_token = \"Bearer \" + token\r\n",
" headers = {\"Authorization\": bearer_token, \"content-type\":\"application/json\" }\r\n",
" response = requests.put(arm_rest_url, headers=headers, data=summary, verify=True)\r\n",
" return response\r\n",
"\r\n",
" def display_result(response):\r\n",
" \"Display the result set as pandas.DataFrame\"\r\n",
" if response != None:\r\n",
" df = pd.DataFrame(response.json()[\"value\"])\r\n",
" display(df)\r\n",
" "
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 2. Authentication to Azure Storage"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"tenant_id = ''\r\n",
"subscription_id = ''\r\n",
"akv_name = ''\r\n",
"akv_link_name = ''\r\n",
"resource_group_name = ''\r\n",
"storage_account_name = ''\r\n",
"container_name = 'azureml'\r\n",
"client_id_name = ''\r\n",
"client_secret_name = ''\r\n",
"resource_group_name_for_dynamic_summaries = ''\r\n",
"sentinel_workspace_name_for_dynamic_summaries = ''\r\n",
"dynamic_summary_name = ''\r\n",
"dynamic_summary_guid = ''"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"tags": [
"parameters"
]
}
},
{
"cell_type": "code",
"source": [
"client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n",
"client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n",
"\r\n",
"credential = ClientSecretCredential(\r\n",
" tenant_id=tenant_id, \r\n",
" client_id=client_id, \r\n",
" client_secret=client_secret)\r\n",
"cred = AzureIdentityCredentialAdapter(credential)\r\n",
"access_token = credential.get_token(\"https://management.azure.com/.default\")\r\n",
"token = access_token[0]"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "markdown",
"source": [
"## 3. Scan Azure Blob for Leaking Credentials"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"storage_client = StorageManagementClient(cred, subscription_id=subscription_id)\r\n",
"\r\n",
"try: \r\n",
" storage_keys = storage_client.storage_accounts.list_keys(resource_group_name,storage_account_name)\r\n",
" days_back = 500\r\n",
" if storage_keys != None:\r\n",
" storage_key = {v.key_name: v.value for v in storage_keys.keys}['key1']\r\n",
"\r\n",
" blob_service_client = BlobServiceClient(\r\n",
" account_url=\"https://{0}.blob.core.windows.net\".format(storage_account_name),\r\n",
" credential=storage_key\r\n",
" )\r\n",
"\r\n",
" if blob_service_client != None:\r\n",
" container_client = blob_service_client.get_container_client(container_name)\r\n",
" if container_client != None:\r\n",
" blob_list = container_client.list_blobs()\r\n",
" result_objects = []\r\n",
" for indexblob, b in enumerate(blob_list):\r\n",
" print(\"Blob name: \" + b.name)\r\n",
"\r\n",
" try:\r\n",
" if (file_modified_date_check(days_back, b.last_modified)):\r\n",
" blob = container_client.download_blob(b)\r\n",
" content = get_file_content(blob)\r\n",
"\r\n",
" # Run Regex strings on the file content\r\n",
" import warnings\r\n",
" warnings.filterwarnings('ignore')\r\n",
"\r\n",
" if content != None:\r\n",
" has_leaking = False\r\n",
" regex_list = get_regex_list()\r\n",
" for indexregex, regex in enumerate(regex_list):\r\n",
" result_object = file_scan_result(b.name, b.last_modified);\r\n",
" re.compile(regex)\r\n",
"\r\n",
" results = re.findall(regex, content)\r\n",
" matched_contents = []\r\n",
" if results:\r\n",
" print(\"================================================\")\r\n",
" print(\"MATCHED REGEX:\\n\" + regex)\r\n",
" print(\"---------------MATCHED CONTENT -----------------\")\r\n",
" for result in results:\r\n",
" print(str(result))\r\n",
" matched_contents.append(convert_result_to_string(result))\r\n",
" print(\"================================================\")\r\n",
" has_leaking = True \r\n",
" result_object.add_result(\"blob\" + str(indexblob) + \"-regex\" + str(indexregex), matched_contents)\r\n",
" result_objects.append(result_object.results)\r\n",
" if has_leaking == False:\r\n",
" print('No leaking data found')\r\n",
" except Exception as e:\r\n",
" print(e)\r\n",
" print(\"Printing to check how it will look like\")\r\n",
" print(result_encoder().encode(result_objects))\r\n",
" scan_data = json.dumps(result_objects, indent=4, cls=result_encoder)\r\n",
" print(scan_data)\r\n",
" else:\r\n",
" print(\"failed on blob service client\")\r\n",
"except Exception as ex:\r\n",
" if str(ex).find(\"AuthorizationFailed\") >= 0:\r\n",
" print(\"========================================================================\")\r\n",
" print(\"Error: Service principal has no sufficient permission to perform tasks.\")\r\n",
" print(\"========================================================================\")\r\n",
"\r\n",
" raise\r\n",
" \r\n"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"gather": {
"logged": 1642183953308
}
}
},
{
"cell_type": "markdown",
"source": [
"## 4. Save result to Microsoft Sentinel Dynamic Summaries"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"if dynamic_summary_name != None and dynamic_summary_name != '':\r\n",
" summary = DynamicSummary(dynamic_summary_guid)\r\n",
" summary_description = \"This summary is generated from notebook - AutomationGallery-CredentialScanOnAzureBlobStorage.\"\r\n",
" summary_items = []\r\n",
"\r\n",
"if result_objects:\r\n",
" for res_obj in result_objects:\r\n",
" res_df = pd.DataFrame.from_dict(res_obj)\r\n",
"\r\n",
" if not res_df.empty:\r\n",
" for index, row in res_df.iterrows():\r\n",
" packed_content = res_df.iloc[index].to_json()\r\n",
" summary_items.append(summary.construct_summary_item(DynamicSummary.get_new_guid(), None, None, None, None, None, datetime.utcnow(), None, None, packed_content))\r\n",
"\r\n",
" summary.construct_summary(tenant_id, dynamic_summary_name, summary_description, summary_items)\r\n",
" summary_json = \"{ \\\"properties\\\": {\" + summary.serialize() + \"}}\"\r\n",
"\r\n",
" #print(summary_json)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"if result_objects and dynamic_summary_name != None and dynamic_summary_name != '':\r\n",
" dyn_sum_api_url = DynamicSummary.construct_arm_rest_url(subscription_id, resource_group_name_for_dynamic_summaries, sentinel_workspace_name_for_dynamic_summaries, dynamic_summary_guid)\r\n",
" response = DynamicSummary.call_azure_rest_api_for_creating_dynamic_summary(token, dyn_sum_api_url, summary_json)\r\n",
"\r\n",
" print(response.status_code)"
],
"outputs": [],
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
}
],
"metadata": {
"kernelspec": {
"name": "synapse_pyspark",
"display_name": "Synapse PySpark"
},
"language_info": {
"name": "python"
},
"description": null,
"save_output": true,
"synapse_widget": {
"version": "0.1",
"state": {}
}
},
"nbformat": 4,
"nbformat_minor": 2
}