def parse_recommendation_page()

in utilities/tools/platform/Parse-WAF-Security-Recommendations.py [0:0]


def parse_recommendation_page(url):
    try:
        print(f"Fetching page: {url}")
        response = requests.get(url)
        response.raise_for_status()
        content = response.text
        # Convert Markdown to HTML
        html_content = markdown.markdown(content)
        soup = BeautifulSoup(html_content, 'html.parser')
        # Find all h2 tags that contain "Azure"
        azure_h2_tags = [h2 for h2 in soup.find_all('h2') if 'Azure' in h2.text]
        if not azure_h2_tags:
            print("No h2 tag with 'Azure' found")
            return None

        recommendations = []
        for h2 in azure_h2_tags:
            name = h2.text.strip()
            print(f"Found name: {name}")
            # Find all h3 tags that follow this h2 until the next h2
            h3_tags = []
            next_element = h2.next_sibling
            while next_element and next_element.name != 'h2':
                if next_element.name == 'h3':
                    h3_tags.append(next_element)
                next_element = next_element.next_sibling
            for h3 in h3_tags:
                description = h3.text.strip()
                print(f"Found description: {description}")
                # Find the next sibling elements until the next h3 or h2
                next_element = h3.next_sibling
                policy_url = ''
                severity = ''
                type_info = ''
                additional_info = []
                while next_element and next_element.name not in ['h3', 'h2']:
                    if isinstance(next_element, str):
                        text = next_element.strip()
                        if text:
                            additional_info.append(text)
                    elif next_element.name in ['p', 'li']:
                        text = next_element.text.strip()
                        if text:
                            additional_info.append(text)
                    next_element = next_element.next_sibling
                # Join additional info and search for severity, type, and policy URL
                additional_text = ' '.join(additional_info)
                severity_match = re.search(r'Severity:\s*(\w+)', additional_text)
                if severity_match:
                    severity = severity_match.group(1)
                    print(f"Found severity: {severity}")
                type_match = re.search(r'Type:\s*(.+?)(?=\n|$)', additional_text)
                if type_match:
                    type_info = type_match.group(1).strip()
                    print(f"Found type: {type_info}")
                # Extract policy URL
                policy_match = re.search(r'Related policy: \[.+?\]\((https://portal\.azure\.com/#blade/Microsoft_Azure_Policy/PolicyDetailBlade/definitionId/[^)]+)\)', additional_text)
                if policy_match:
                    policy_url = policy_match.group(1)
                    print(f"Found policy URL: {policy_url}")
                recommendations.append({
                    'name': name,
                    'description': description,
                    'policy_url': policy_url,
                    'severity': severity,
                    'type': type_info
                })
        return recommendations
    except Exception as e:
        print(f"Error parsing page {url}: {str(e)}")
        return None