check-release-notes/main.py

# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import os import re from concurrent import futures from datetime import datetime from hashlib import sha256 import functions_framework import requests from bs4 import BeautifulSoup from google.cloud import firestore, pubsub_v1 from product_rss_urls import rss_urls from pytz import timezone batch_settings = pubsub_v1.types.BatchSettings( max_messages=100, # default 100 max_bytes=1024, # default 1 MB max_latency=1, # default 10 ms ) publisher = pubsub_v1.PublisherClient(batch_settings) topic_path = publisher.topic_path( os.environ.get("GCP_PROJECT_ID"), os.environ.get("PUB_SUB_TOPIC_NAME") ) publish_futures = [] firestore_client = firestore.Client(project=os.environ.get("GCP_PROJECT_ID")) def remove_libraries(html): """ Remove the libraries section (e.g. <h3>Libraries</h3>...) from the release notes because this section tends to be very verbose and doesn't display well in Chat. Replace it with a generic <h3>Libraries Updated</h3> section. Args: html: The html from which to remove the libraries section Returns: The html with the libraries section removed """ if re.search(r"<h3>Libraries</h3>(.|\n)*?<h3>", html): html = re.sub( r"<h3>Libraries</h3>(.|\n)*?<h3>", "<h3>Libraries Updated</h3>\n<h3>", html ) elif "<h3>Libraries</h3>" in html: # This is the case where the libraries section is the last section # so there won't be a <h3> tag after it html = re.sub(r"<h3>Libraries</h3>(.|\n)*", "<h3>Libraries Updated</h3>", html) return html def get_todays_release_note(rss_url): """ Parses a product release notes RSS feed and returns the latest release note. Args: rss_url (str): The URL of the RSS feed. Returns: str: The title and description of the latest release note, or None if an error occurs. """ try: response = requests.get(rss_url) response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) soup = BeautifulSoup(response.content, "xml") product = re.sub( " - release notes", "", soup.find("title").contents[0], flags=re.IGNORECASE ) updated = soup.find("updated").contents[0] item = soup.find("entry") if item: title = item.find("title").contents[0] release_note = item.find("content").contents[0] release_note = remove_libraries(release_note) link = item.find("link")["href"] # Parse the updated date of the release note updated_date = datetime.strptime(updated.split("T")[0], "%Y-%m-%d").date() today_date = ( datetime.now() .astimezone(timezone("US/Eastern")) .replace(second=0, minute=0, hour=0, microsecond=0) .date() ) is_updated_today = updated_date == today_date if is_updated_today: return dict( product=product, date=updated_date.strftime("%B %d, %Y"), link=link, html=release_note, rss_url=rss_url, ) return None except requests.exceptions.RequestException as e: print(f"Error fetching {rss_url}: {e}") return None except AttributeError as e: print(f"Error parsing {rss_url}: {e}") return None except Exception as e: print(f"An unexpected error occurred while fetching {rss_url}: {e}") return None def get_new_release_note_subsections(latest_release_note, stored_release_note): """ Get the new release note subsections by comparing the new release note with the stored release note. Subsections are defined as any section that starts with <h3> header. Args: new_release_note: The new release note stored_release_note: The stored release note Returns: The new release note subsections """ latest_release_note_subsections_html = re.split( r"\<h3\>.*?\<\/h3\>", latest_release_note.get("html") )[1:] latest_release_note_subsections_text_only = [ BeautifulSoup(html, "html.parser").get_text() for html in latest_release_note_subsections_html ] latest_release_note_subsections_headers = re.findall( r"<h3>(.*)?</h3>", latest_release_note.get("html") ) stored_release_note_subsections_html = re.split( r"\<h3\>.*?\<\/h3\>", stored_release_note.get("html") )[1:] stored_release_note_subsections_text_only = [ BeautifulSoup(html, "html.parser").get_text() for html in stored_release_note_subsections_html ] stored_release_note_subsections_headers = re.findall( r"<h3>(.*)?</h3>", stored_release_note.get("html") ) # Get only new subsections from the latest release note new_release_notes_subsections = "" for index, subsection_text in enumerate(latest_release_note_subsections_text_only): if subsection_text not in stored_release_note_subsections_text_only: new_release_notes_subsections += f"<h3>{latest_release_note_subsections_headers[index]}</h3>{latest_release_note_subsections_html[index]}" latest_release_note["html"] = new_release_notes_subsections return latest_release_note def get_new_release_notes(latest_release_notes): new_release_notes = {} for product in latest_release_notes: doc_ref = firestore_client.collection("cloud_release_notes").document( product.replace("/", "") ) stored_release_note = doc_ref.get().to_dict() if stored_release_note and stored_release_note.get("html"): if isNewRelease( latest_release_notes.get(product), stored_release_note, ): save_release_note_to_firestore( product, latest_release_notes.get(product) ) new_release_notes[product] = get_new_release_note_subsections( latest_release_notes.get(product), stored_release_note ) else: save_release_note_to_firestore(product, latest_release_notes.get(product)) new_release_notes[product] = latest_release_notes.get(product) return new_release_notes def isNewRelease(latest_release_note, stored_release_note): """ Check if anything in the release notes is new by comparing the sha256 hash of the release notes taken from the release notes page and the stored release notes which are stored in the Firestore database. Args: latest_release_notes: The latest release notes for all products stored_release_notes: The stored release notes for all products Returns: True if the release notes are new, False otherwise """ stored_release_note_text_only = BeautifulSoup( stored_release_note.get("html"), "html.parser" ).get_text() latest_release_note_text_only = BeautifulSoup( latest_release_note.get("html"), "html.parser" ).get_text() return ( sha256(latest_release_note_text_only.encode("utf-8")).digest() != sha256(stored_release_note_text_only.encode("utf-8")).digest() ) def save_release_note_to_firestore(product, new_release): doc_ref = firestore_client.collection("cloud_release_notes").document( product.replace("/", "") ) doc_ref.set(new_release) def publish_to_pubsub(space_id, release_note): """Publishes a message to Pub/Sub with space ID and HTML content.""" message_json = json.dumps( { "space_id": space_id, "release_note": release_note, } ).encode("utf-8") future = publisher.publish(topic_path, message_json) # Non-blocking. Allow the publisher client to batch multiple messages. future.add_done_callback(callback) publish_futures.append(future) print(f"Published message ID: {future.result()}") # Resolve the publish future in a separate thread. def callback(future: pubsub_v1.publisher.futures.Future) -> None: message_id = future.result() print(message_id) # To deploy the function, run the following command: # functions-framework --target=http_request @functions_framework.http def http_request(request): """HTTP Cloud Function. Args: request (flask.Request): The request object. <https://flask.palletsprojects.com/en/1.1.x/api/#incoming-request-data> Returns: The response text, or any set of values that can be turned into a Response object using `make_response` <https://flask.palletsprojects.com/en/1.1.x/api/#flask.make_response>. """ rss_url = "https://cloud.google.com/feeds/bigquery-release-notes.xml" todays_release_notes_dict = {} with futures.ThreadPoolExecutor() as executor: todays_release_notes = executor.map(get_todays_release_note, rss_urls) for release_note in todays_release_notes: if release_note: todays_release_notes_dict[release_note["product"]] = release_note new_release_notes_only = get_new_release_notes(todays_release_notes_dict) if new_release_notes_only: print(f"Found new release notes: {new_release_notes_only}") # Get spaces subscribed to the products with new release notes subscriptions_ref = firestore_client.collection("space_product_subscriptions") for product, release_note in new_release_notes_only.items(): product_doc = subscriptions_ref.document(product.replace("/", "")).get() if product_doc.exists: spaces_subscribed = product_doc.to_dict().get("spaces_subscribed", []) for space_id in spaces_subscribed: publish_to_pubsub(space_id, release_note) futures.wait(publish_futures, return_when=futures.ALL_COMPLETED) else: print("No new release notes") return ("Done", 200)

check-release-notes/main.py (174 lines of code) (raw):