# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
import re
from concurrent import futures
from datetime import datetime
from hashlib import sha256

import functions_framework
import requests
from bs4 import BeautifulSoup
from google.cloud import firestore, pubsub_v1
from product_rss_urls import rss_urls
from pytz import timezone

batch_settings = pubsub_v1.types.BatchSettings(
    max_messages=100,  # default 100
    max_bytes=1024,  # default 1 MB
    max_latency=1,  # default 10 ms
)
publisher = pubsub_v1.PublisherClient(batch_settings)
topic_path = publisher.topic_path(
    os.environ.get("GCP_PROJECT_ID"), os.environ.get("PUB_SUB_TOPIC_NAME")
)
publish_futures = []
firestore_client = firestore.Client(project=os.environ.get("GCP_PROJECT_ID"))


def remove_libraries(html):
    """
    Remove the libraries section (e.g. <h3>Libraries</h3>...) from the release notes
    because this section tends to be very verbose and doesn't display well in Chat.
    Replace it with a generic <h3>Libraries Updated</h3> section.
    Args:
        html: The html from which to remove the libraries section
    Returns:
        The html with the libraries section removed
    """
    if re.search(r"<h3>Libraries</h3>(.|\n)*?<h3>", html):
        html = re.sub(
            r"<h3>Libraries</h3>(.|\n)*?<h3>", "<h3>Libraries Updated</h3>\n<h3>", html
        )
    elif "<h3>Libraries</h3>" in html:
        # This is the case where the libraries section is the last section
        # so there won't be a <h3> tag after it
        html = re.sub(r"<h3>Libraries</h3>(.|\n)*", "<h3>Libraries Updated</h3>", html)
    return html


def get_todays_release_note(rss_url):
    """
    Parses a product release notes RSS feed and returns the latest release note.

    Args:
        rss_url (str): The URL of the RSS feed.

    Returns:
        str: The title and description of the latest release note, or None if an error occurs.
    """
    try:
        response = requests.get(rss_url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        soup = BeautifulSoup(response.content, "xml")
        product = re.sub(
            " - release notes", "", soup.find("title").contents[0], flags=re.IGNORECASE
        )
        updated = soup.find("updated").contents[0]
        item = soup.find("entry")
        if item:
            title = item.find("title").contents[0]
            release_note = item.find("content").contents[0]
            release_note = remove_libraries(release_note)
            link = item.find("link")["href"]
            # Parse the updated date of the release note
            updated_date = datetime.strptime(updated.split("T")[0], "%Y-%m-%d").date()
            today_date = (
                datetime.now()
                .astimezone(timezone("US/Eastern"))
                .replace(second=0, minute=0, hour=0, microsecond=0)
                .date()
            )
            is_updated_today = updated_date == today_date
            if is_updated_today:
                return dict(
                    product=product,
                    date=updated_date.strftime("%B %d, %Y"),
                    link=link,
                    html=release_note,
                    rss_url=rss_url,
                )
        return None

    except requests.exceptions.RequestException as e:
        print(f"Error fetching {rss_url}: {e}")
        return None
    except AttributeError as e:
        print(f"Error parsing {rss_url}: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred while fetching {rss_url}: {e}")
        return None


def get_new_release_note_subsections(latest_release_note, stored_release_note):
    """
    Get the new release note subsections by comparing the new release note with the stored release note.
    Subsections are defined as any section that starts with <h3> header.
    Args:
        new_release_note: The new release note
        stored_release_note: The stored release note
    Returns:
        The new release note subsections
    """
    latest_release_note_subsections_html = re.split(
        r"\<h3\>.*?\<\/h3\>", latest_release_note.get("html")
    )[1:]
    latest_release_note_subsections_text_only = [
        BeautifulSoup(html, "html.parser").get_text()
        for html in latest_release_note_subsections_html
    ]
    latest_release_note_subsections_headers = re.findall(
        r"<h3>(.*)?</h3>", latest_release_note.get("html")
    )
    stored_release_note_subsections_html = re.split(
        r"\<h3\>.*?\<\/h3\>", stored_release_note.get("html")
    )[1:]
    stored_release_note_subsections_text_only = [
        BeautifulSoup(html, "html.parser").get_text()
        for html in stored_release_note_subsections_html
    ]
    stored_release_note_subsections_headers = re.findall(
        r"<h3>(.*)?</h3>", stored_release_note.get("html")
    )
    # Get only new subsections from the latest release note
    new_release_notes_subsections = ""
    for index, subsection_text in enumerate(latest_release_note_subsections_text_only):
        if subsection_text not in stored_release_note_subsections_text_only:
            new_release_notes_subsections += f"<h3>{latest_release_note_subsections_headers[index]}</h3>{latest_release_note_subsections_html[index]}"
    latest_release_note["html"] = new_release_notes_subsections
    return latest_release_note


def get_new_release_notes(latest_release_notes):
    new_release_notes = {}
    for product in latest_release_notes:
        doc_ref = firestore_client.collection("cloud_release_notes").document(
            product.replace("/", "")
        )
        stored_release_note = doc_ref.get().to_dict()
        if stored_release_note and stored_release_note.get("html"):
            if isNewRelease(
                latest_release_notes.get(product),
                stored_release_note,
            ):
                save_release_note_to_firestore(
                    product, latest_release_notes.get(product)
                )
                new_release_notes[product] = get_new_release_note_subsections(
                    latest_release_notes.get(product), stored_release_note
                )
        else:
            save_release_note_to_firestore(product, latest_release_notes.get(product))
            new_release_notes[product] = latest_release_notes.get(product)
    return new_release_notes


def isNewRelease(latest_release_note, stored_release_note):
    """
    Check if anything in the release notes is new by comparing the sha256 hash of the release notes
    taken from the release notes page and the stored release notes which are stored in
    the Firestore database.
    Args:
        latest_release_notes: The latest release notes for all products
        stored_release_notes: The stored release notes for all products
    Returns:
        True if the release notes are new, False otherwise
    """
    stored_release_note_text_only = BeautifulSoup(
        stored_release_note.get("html"), "html.parser"
    ).get_text()
    latest_release_note_text_only = BeautifulSoup(
        latest_release_note.get("html"), "html.parser"
    ).get_text()
    return (
        sha256(latest_release_note_text_only.encode("utf-8")).digest()
        != sha256(stored_release_note_text_only.encode("utf-8")).digest()
    )


def save_release_note_to_firestore(product, new_release):
    doc_ref = firestore_client.collection("cloud_release_notes").document(
        product.replace("/", "")
    )
    doc_ref.set(new_release)


def publish_to_pubsub(space_id, release_note):
    """Publishes a message to Pub/Sub with space ID and HTML content."""
    message_json = json.dumps(
        {
            "space_id": space_id,
            "release_note": release_note,
        }
    ).encode("utf-8")
    future = publisher.publish(topic_path, message_json)
    # Non-blocking. Allow the publisher client to batch multiple messages.
    future.add_done_callback(callback)
    publish_futures.append(future)
    print(f"Published message ID: {future.result()}")


# Resolve the publish future in a separate thread.
def callback(future: pubsub_v1.publisher.futures.Future) -> None:
    message_id = future.result()
    print(message_id)


# To deploy the function, run the following command:
# functions-framework --target=http_request
@functions_framework.http
def http_request(request):
    """HTTP Cloud Function.
    Args:
        request (flask.Request): The request object.
        <https://flask.palletsprojects.com/en/1.1.x/api/#incoming-request-data>
    Returns:
        The response text, or any set of values that can be turned into a
        Response object using `make_response`
        <https://flask.palletsprojects.com/en/1.1.x/api/#flask.make_response>.
    """
    rss_url = "https://cloud.google.com/feeds/bigquery-release-notes.xml"
    todays_release_notes_dict = {}
    with futures.ThreadPoolExecutor() as executor:
        todays_release_notes = executor.map(get_todays_release_note, rss_urls)
    for release_note in todays_release_notes:
        if release_note:
            todays_release_notes_dict[release_note["product"]] = release_note
    new_release_notes_only = get_new_release_notes(todays_release_notes_dict)
    if new_release_notes_only:
        print(f"Found new release notes: {new_release_notes_only}")
        # Get spaces subscribed to the products with new release notes
        subscriptions_ref = firestore_client.collection("space_product_subscriptions")
        for product, release_note in new_release_notes_only.items():
            product_doc = subscriptions_ref.document(product.replace("/", "")).get()
            if product_doc.exists:
                spaces_subscribed = product_doc.to_dict().get("spaces_subscribed", [])
                for space_id in spaces_subscribed:
                    publish_to_pubsub(space_id, release_note)
        futures.wait(publish_futures, return_when=futures.ALL_COMPLETED)
    else:
        print("No new release notes")
    return ("Done", 200)