def extract_product_details()

in use-cases/model-fine-tuning-pipeline/data-preparation/gemma-it/src/dataprep.py [0:0]


def extract_product_details(text):
    # Initialize empty string
    output_string = ""

    # Extract content before "Description:"
    match = re.search(r"(.*?)Description:", text, re.DOTALL)
    if match:
        content_before_description = match.group(1)

        # Remove <br> tags and "Product Category:" line
        cleaned_content = content_before_description.replace("<br>", "\n")
        lines = [
            line.strip()
            for line in cleaned_content.splitlines()
            if line.strip() and not line.startswith("Product Category:")
        ]

        # Extract and parse attributes
        match_attributes = re.search(
            r"Attributes:\s*(\{.*?\})", cleaned_content, re.DOTALL
        )
        if match_attributes:
            attributes_str = match_attributes.group(1)
            attributes = json.loads(attributes_str)

            # Append formatted output to output_string
            for line in lines:
                if not line.startswith("Attributes:"):
                    output_string += line + "\n"
            output_string += "Product Details:\n"
            for key, value in attributes.items():
                output_string += f"- {key}: {value}\n"

    # Return the final string
    return output_string