def _check_core_logic_parse_output()

in atr/tasks/checks/rat.py [0:0]


def _check_core_logic_parse_output(xml_file: str, base_dir: str) -> dict[str, Any]:
    """Parse the XML output from Apache RAT."""
    try:
        tree = ElementTree.parse(xml_file)
        root = tree.getroot()

        total_files = 0
        approved_licenses = 0
        unapproved_licenses = 0
        unknown_licenses = 0

        unapproved_files = []
        unknown_license_files = []

        # Process each resource
        for resource in root.findall(".//resource"):
            total_files += 1

            # Get the name attribute value
            name = resource.get("name", "")

            # Remove base_dir prefix for cleaner display
            if name.startswith(base_dir):
                name = name[len(base_dir) :].lstrip("/")

            # Get license information
            license_approval = resource.find("license-approval")
            license_family = resource.find("license-family")

            is_approved = license_approval is not None and license_approval.get("name") == "true"
            license_name = license_family.get("name") if license_family is not None else "Unknown"

            # Update counters and lists
            if is_approved:
                approved_licenses += 1
            elif license_name == "Unknown license":
                unknown_licenses += 1
                unknown_license_files.append({"name": name, "license": license_name})
            else:
                unapproved_licenses += 1
                unapproved_files.append({"name": name, "license": license_name})

        # Calculate overall validity
        valid = unapproved_licenses == 0

        # Prepare awkwardly long summary message
        message = f"""\
Found {approved_licenses} files with approved licenses, {unapproved_licenses} \
with unapproved licenses, and {unknown_licenses} with unknown licenses"""

        # We limit the number of files we report to 100
        return {
            "valid": valid,
            "message": message,
            "total_files": total_files,
            "approved_licenses": approved_licenses,
            "unapproved_licenses": unapproved_licenses,
            "unknown_licenses": unknown_licenses,
            "unapproved_files": unapproved_files[:100],
            "unknown_license_files": unknown_license_files[:100],
            "errors": [],
        }

    except Exception as e:
        _LOGGER.error(f"Error parsing RAT output: {e}")
        return {
            "valid": False,
            "message": f"Failed to parse Apache RAT output: {e!s}",
            "total_files": 0,
            "approved_licenses": 0,
            "unapproved_licenses": 0,
            "unknown_licenses": 0,
            "errors": [f"XML parsing error: {e!s}"],
        }