# Log Analytics - Custom Table Setup

__Notebook Version:__ 1.0<br>
__Python Version:__ Python 3.8<br>
__Apache Spark Version:__ 3.1<br>
__Required Packages:__ azure-monitor-query, azure-mgmt-loganalytics<br>
__Platforms Supported:__  Azure Synapse Analytics
     
__Data Source Required:__ No 
    
### Description
This notebook provides step-by-step instructions and sample code to create data collection endpoint, custom table, and data collect rules for Azure Log Analytics.<br>
*** Please run the cells sequentially to avoid errors.  Please do not use "run all cells". *** <br>

## Table of Contents
1. Warm-up
2. Azure Authentication
3. Create Data Collection Endpoint (DCE)
4. Create Custom Table
5. Create Data Collection Rule (DCR)

## 1. Warm-up

In [None]:
# Load Python libraries that will be used in this notebook
from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential
from azure.core.exceptions import  HttpResponseError 

import json
from IPython.display import display, HTML, Markdown

In [None]:
# User Inputs section 1
tenant_id = ""
subscription_id = ""
workspace_id = ""

# Azure KV for accessing service principal info
akv_name = ""
client_id_name = ""
client_secret_name = ""
akv_link_name = ""

In [None]:
# User Inputs section 2
# Parameters for provisioning resources
resource_group_name = ""
location = ""
workspace_name = ''
workspace_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}'.format(subscription_id, resource_group_name, workspace_name)
data_collection_endpoint_name = ""
data_collection_rule_name = ""
custom_table_name = ""
custom_table_full_name = "Custom-" + custom_table_name

## 2. Azure Authentication

In [None]:
# You may need to change resource_uri for various cloud environments.
resource_uri = "https://api.loganalytics.io"
client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)
client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)

credential = ClientSecretCredential(
    tenant_id=tenant_id, 
    client_id=client_id, 
    client_secret=client_secret)
access_token = credential.get_token(resource_uri + "/.default")
token = access_token[0]

## 3. Create Data Collection Endpoint (DCE)

In [None]:
dce_json_string = """
{
    "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
    "contentVersion": "1.0.0.0",
    "parameters": {
        "dataCollectionEndpointName": {
            "type": "string",
            "metadata": {
                "description": "Specifies the name of the Data Collection Endpoint to create."
            }
        },
        "location": {
            "type": "string",
            "defaultValue": "eastus",
            "metadata": {
                "description": "Specifies the location for the Data Collection Endpoint."
            }
        }
    },
    "resources": [
        {
            "type": "Microsoft.Insights/dataCollectionEndpoints",
            "name": "[parameters('dataCollectionEndpointName')]",
            "location": "[parameters('location')]",
            "apiVersion": "2021-04-01",
            "properties": {
                "networkAcls": {
                "publicNetworkAccess": "Enabled"
                }
            }
        }
    ],
    "outputs": {
        "dataCollectionEndpointId": {
            "type": "string",
            "value": "[resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))]"
        },
        "endpoint": {
            "type": "object",
            "value": "[reference(resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))).logsIngestion]"
        }
    }
}
"""

In [None]:
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.resource.resources.models import DeploymentMode

resource_client = ResourceManagementClient(credential, subscription_id)
template_body = json.loads(dce_json_string)

rg_deployment_result = resource_client.deployments.begin_create_or_update(
    resource_group_name,
    "exampleDeployment",
    {
        "properties": {
            "template": template_body,
            "parameters": {
                "location": {
                    "value": location
                },
                "dataCollectionEndpointName": {
                    "value": data_collection_endpoint_name
                },
            },
            "mode": DeploymentMode.incremental
        }
    }
)

In [None]:
dce_res_id = ''
dce_endpoint = ''
if rg_deployment_result.status() != "Succeeded":
    print(rg_deployment_result.status())
    print('Run the cell until stauts=Succeeded or when you see Failed.')
else:
    dce_resource_id = rg_deployment_result.result().properties.outputs["dataCollectionEndpointId"].get("value")
    dce_endpoint = rg_deployment_result.result().properties.outputs["endpoint"].get("value")['endpoint']
    print('You will need DCE Endpoint for future data ingestion!')
    print('DCE Endpoint: ' + dce_endpoint)

## 4. Create Custom Table

In [None]:
# Please replace columns info with your own columns
cus_table_json_string = """
{
    "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
    "contentVersion": "1.0.0.0",
    "parameters": {
        "workspaceName": {
            "type": "string",
            "metadata": {
                "description": "LA workspace name."
            }
        },
        "customTableName": {
            "type": "string",
            "metadata": {
                "description": "table name."
            }
        }
    },
    "resources": [
        {
            "type": "Microsoft.OperationalInsights/workspaces/tables",
            "apiVersion": "2021-12-01-preview",
            "name": "[concat(parameters('workspaceName'), '/', parameters('customTableName'))]",
            "kind": "CustomLog",
            "properties": {
                "totalRetentionInDays": 90,
                "plan": "Analytics",
                "schema": {
                    "name": "[parameters('customTableName')]",
                    "columns": [
                        {
                            "name": "TimeGenerated",
                            "type": "datetime"
                        },
                        {
                            "name": "Url",
                            "type": "string"
                        },
                        {
                            "name": "Fact",
                            "type": "string"
                        }
                    ]
                },
                "retentionInDays": 90
            }
        }
    ],
    "outputs": {
        "streamName": {
            "type": "string",
            "value": "[concat('Custom-', parameters('customTableName'))]"
        }
    }
}
"""

In [None]:
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.resource.resources.models import DeploymentMode

resource_client = ResourceManagementClient(credential, subscription_id)
template_body = json.loads(cus_table_json_string)
table_tag = "defaultct"

rg_deployment_result = resource_client.deployments.begin_create_or_update(
    resource_group_name,
    table_tag,
    {
        "properties": {
            "template": template_body,
            "parameters": {
                "workspaceName": {
                    "value": workspace_name
                },
                "customTableName": {
                    "value": custom_table_name
                }
            },
            "mode": DeploymentMode.incremental
        }
    }
)

In [None]:
stream_name = ''
if rg_deployment_result.status() != "Succeeded":
    print(rg_deployment_result.status())
    print('Run the cell until stauts=Succeeded or when you see Failed.')
else:
    stream_name = rg_deployment_result.result().properties.outputs["streamName"].get("value")
    print('You will need full stream name for future data ingestion!')
    print('Stream Name: ' + stream_name)

## 5. Create Data Collection Rule (DCR)

In [None]:
# Please replace columns info with your own columns
dcr_json_string = """
{
    "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
    "contentVersion": "1.0.0.0",
    "parameters": {
        "dataCollectionRuleName": {
            "type": "string",
            "metadata": {
                "description": "Specifies the name of the Data Collection Rule to create."
            }
        },
        "location": {
            "type": "string",
            "metadata": {
                "description": "Specifies the location in which to create the Data Collection Rule."
            }
        },
        "workspaceResourceId": {
            "type": "string",
            "metadata": {
                "description": "Specifies the Azure resource ID of the Log Analytics workspace to use."
            }
        },
        "workspaceName": {
            "type": "string",
            "metadata": {
                "description": "LA workspace name."
            }
        },
        "endpointResourceId": {
            "type": "string",
            "metadata": {
                "description": "Specifies the Azure resource ID of the Data Collection Endpoint to use."
            }
        },
        "customTableFullName": {
            "type": "string",
            "metadata": {
                "description": "table name."
            }
        },
        "subscriptionId": {
            "type": "string",
            "metadata": {
                "description": "Subscription Id."
            }
        }
    },
    "resources": [
        {
            "type": "Microsoft.Insights/dataCollectionRules",
            "name": "[parameters('dataCollectionRuleName')]",
            "location": "[parameters('location')]",
            "apiVersion": "2021-09-01-preview",
            "properties": {
                "dataCollectionEndpointId": "[parameters('endpointResourceId')]",
                "streamDeclarations": {
                    "[parameters('customTableFullName')]": {
                        "columns": [
                            {
                                "name": "TimeGenerated",
                                "type": "datetime"
                            },
                            {
                                "name": "Url",
                                "type": "string"
                            },
                            {
                                "name": "Fact",
                                "type": "string"
                            }
                        ]
                    }
                },
                "destinations": {
                    "logAnalytics": [
                        {
                            "workspaceResourceId": "[parameters('workspaceResourceId')]",
                            "name": "[parameters('workspaceName')]"
                        }
                    ]
                },
                "dataFlows": [
                    {
                        "streams": [
                            "[parameters('customTableFullName')]"
                        ],
                        "destinations": [
                            "[parameters('workspaceName')]"
                        ],
                        "transformKql": "source",
                        "outputStream": "[parameters('customTableFullName')]"
                    }
                ]
            }
        }
    ],
    "outputs": {
        "dataCollectionRuleId": {
            "type": "string",
            "value": "[resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))]"
        },
        "immutableId": {
            "type": "string",
            "value": "[reference(resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))).immutableId]"
        }
    }
}
"""

In [None]:
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.resource.resources.models import DeploymentMode

resource_client = ResourceManagementClient(credential, subscription_id)

template_body = json.loads(dcr_json_string)
dce_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Insights/dataCollectionEndpoints/{2}'.format(subscription_id, resource_group_name, data_collection_endpoint_name)
endpoint_resource_id = dce_resource_id
tag_name = "defaultdcr"

rg_deployment_result = resource_client.deployments.begin_create_or_update(
    resource_group_name,
    tag_name,
    {
        "properties": {
            "template": template_body,
            "parameters": {
                "location": {
                    "value": location
                },
                "dataCollectionRuleName": {
                    "value": data_collection_rule_name
                },
                "workspaceResourceId": {
                    "value": workspace_resource_id
                },
                "workspaceName": {
                    "value": workspace_name
                },
                "endpointResourceId": {
                    "value": endpoint_resource_id
                },
                "customTableFullName": {
                    "value": custom_table_full_name
                },
                "subscriptionId": {
                    "value": subscription_id
                }
            },
            "mode": DeploymentMode.incremental
        }
    }
)

In [None]:
immutable_id = ''
if rg_deployment_result.status() != "Succeeded":
    print(rg_deployment_result.status())
    print('Run the cell until stauts=Succeeded or when you see Failed.')
else:
    immutable_id = rg_deployment_result.result().properties.outputs["immutableId"].get("value")
    print('You will need DCR Immutable Id for future data ingestion!')
    print('DCR Immutable Id: ' + immutable_id)


In [None]:
rg_deployment_result.result()