in utilities/tools/platform/Parse-WAF-Security-Recommendations.py [0:0]
def parse_recommendation_page(url):
try:
print(f"Fetching page: {url}")
response = requests.get(url)
response.raise_for_status()
content = response.text
# Convert Markdown to HTML
html_content = markdown.markdown(content)
soup = BeautifulSoup(html_content, 'html.parser')
# Find all h2 tags that contain "Azure"
azure_h2_tags = [h2 for h2 in soup.find_all('h2') if 'Azure' in h2.text]
if not azure_h2_tags:
print("No h2 tag with 'Azure' found")
return None
recommendations = []
for h2 in azure_h2_tags:
name = h2.text.strip()
print(f"Found name: {name}")
# Find all h3 tags that follow this h2 until the next h2
h3_tags = []
next_element = h2.next_sibling
while next_element and next_element.name != 'h2':
if next_element.name == 'h3':
h3_tags.append(next_element)
next_element = next_element.next_sibling
for h3 in h3_tags:
description = h3.text.strip()
print(f"Found description: {description}")
# Find the next sibling elements until the next h3 or h2
next_element = h3.next_sibling
policy_url = ''
severity = ''
type_info = ''
additional_info = []
while next_element and next_element.name not in ['h3', 'h2']:
if isinstance(next_element, str):
text = next_element.strip()
if text:
additional_info.append(text)
elif next_element.name in ['p', 'li']:
text = next_element.text.strip()
if text:
additional_info.append(text)
next_element = next_element.next_sibling
# Join additional info and search for severity, type, and policy URL
additional_text = ' '.join(additional_info)
severity_match = re.search(r'Severity:\s*(\w+)', additional_text)
if severity_match:
severity = severity_match.group(1)
print(f"Found severity: {severity}")
type_match = re.search(r'Type:\s*(.+?)(?=\n|$)', additional_text)
if type_match:
type_info = type_match.group(1).strip()
print(f"Found type: {type_info}")
# Extract policy URL
policy_match = re.search(r'Related policy: \[.+?\]\((https://portal\.azure\.com/#blade/Microsoft_Azure_Policy/PolicyDetailBlade/definitionId/[^)]+)\)', additional_text)
if policy_match:
policy_url = policy_match.group(1)
print(f"Found policy URL: {policy_url}")
recommendations.append({
'name': name,
'description': description,
'policy_url': policy_url,
'severity': severity,
'type': type_info
})
return recommendations
except Exception as e:
print(f"Error parsing page {url}: {str(e)}")
return None