devai-api/app/github_utils.py (149 lines of code) (raw):

# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import subprocess import requests from git import Repo from github import Auth from langchain_community.utilities.github import GitHubAPIWrapper from google.cloud.aiplatform import telemetry from vertexai.generative_models import GenerativeModel from .constants import USER_AGENT, MODEL_NAME from .file_processor import format_files_as_string model = GenerativeModel(MODEL_NAME) file_update_request = """{} OLD <<<< {} >>>> OLD NEW <<<< {} >>>> NEW """ def get_source_code(repo_name: str): """Clones the specified GitHub repository and returns its source code as a string. Args: repo_name (str): The name of the repository to clone. Returns: str: The source code of the cloned repository. """ github_account = os.environ["GITHUB_ACCOUNT"] repo_name = os.environ["GITHUB_REPO_NAME"] clone_repo(github_account, repo_name) return format_files_as_string(f"{repo_name}") def generate_pr_summary(existing_source_code: str, new_source_code: str) -> str: """Generates a summary for a GitHub pull request based on the changes between the existing and new source code. Args: existing_source_code (str): The original source code. new_source_code (str): The modified source code. Returns: str: A string containing the pull request title and description, separated by a newline character. Returns None if an error occurs. """ pr_summary_template = """ Summarize the changes between old and new source code and return summary for GitHub pull request. Response format: PR Name\nnPR description Example format: Test PR\nnThis is a test PR. OLD SOURCE CODE: {} NEW SOURCE CODE: {} """ try: with telemetry.tool_context_manager(USER_AGENT): code_chat = model.start_chat(response_validation=False) pr_response = code_chat.send_message( pr_summary_template.format(existing_source_code, new_source_code) ) return pr_response.text except Exception as e: print(f"Error generating pull request summary: {e}") return def create_github_pr(branch: str, files: dict[str, str]): """Creates a GitHub pull request with the specified branch and file updates. Args: branch (str): The name of the branch to create the pull request from. files (dict[str, str]): A dictionary where keys are filepaths and values are the new file content. Returns: The response from the GitHub API. Returns None if an error occurs. """ github = GitHubAPIWrapper( github_app_id=os.getenv("GITHUB_APP_ID"), github_app_private_key=os.getenv("GITHUB_APP_PRIVATE_KEY"), github_repository=f"{os.getenv('GITHUB_ACCOUNT')}/{os.getenv('GITHUB_REPO_NAME')}", ) try: resp = github.create_branch(branch) print(resp) except Exception as e: print(f"Error creating branch: {e}") return existing_files = {} existing_source_code = "" new_source_code = "" for filepath, content in files.items(): try: old_file_contents = github.read_file(filepath) existing_files[filepath] = old_file_contents resp = github.update_file( file_update_request.format(filepath, old_file_contents, content) ) print(resp) existing_source_code += f"\nFile: {filepath}\nContent:\n{old_file_contents}" new_source_code += f"\nFile: {filepath}\nContent:\n{content}" except Exception as e: print(f"Error updating file {filepath}: {e}") return try: pr_summary = generate_pr_summary(existing_source_code, new_source_code) resp = github.create_pull_request(pr_summary) print(resp) github_account = os.environ["GITHUB_ACCOUNT"] repo_name = os.environ["GITHUB_REPO_NAME"] pr_link = f"https://github.com/{github_account}/{repo_name}/pulls" return pr_link except Exception as e: print(f"Error creating pull request: {e}") return def clone_repo(github_account: str, repo_name: str): """Clones the specified GitHub repository using the provided credentials. Args: github_account (str): The GitHub account/organization name. repo_name (str): The name of the repository. Returns: Repo: The cloned repository object. Returns None if cloning fails. """ try: github_app_id = os.environ["GITHUB_APP_ID"] github_installation_id = os.environ["GITHUB_APP_INSTALLATION_ID"] github_app_private_key = os.environ["GITHUB_APP_PRIVATE_KEY"] try: with open(github_app_private_key, "r") as f: private_key = f.read() except Exception: private_key = github_app_private_key auth = Auth.AppAuth( github_app_id, private_key, ) jwt_token = auth.create_jwt() response = requests.post( f"https://api.github.com/app/installations/{github_installation_id}/access_tokens", headers={ "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github+json", }, ) installation_token = response.json()["token"] repo = Repo.clone_from( f"https://x-access-token:{installation_token}@github.com/{github_account}/{repo_name}.git", repo_name, ) return repo except Exception as e: print(f"Error cloning repository: {e}") return None def delete_folder(repo_name: str): """Deletes the specified folder and its contents. Args: repo_name (str): The name of the folder to delete. """ try: subprocess.run(["rm", "-rf", repo_name], check=True) except Exception as e: print(f"Error deleting folder: {e}") def validate_github_setup(): if not os.getenv("GITHUB_APP_ID"): raise ValueError("GITHUB_APP_ID environment variable is not set") if not os.getenv("GITHUB_APP_PRIVATE_KEY"): raise ValueError("GITHUB_APP_PRIVATE_KEY environment variable is not set") if not os.getenv("GITHUB_ACCOUNT"): raise ValueError("GITHUB_ACCOUNT environment variable is not set") if not os.getenv("GITHUB_REPO_NAME"): raise ValueError("GITHUB_REPO_NAME environment variable is not set") if not os.getenv("GITHUB_APP_INSTALLATION_ID"): raise ValueError("GITHUB_APP_INSTALLATION_ID environment variable is not set") def create_pull_request(prompt: str): """Creates a pull request on GitHub with updates to the README.md file. Args: prompt (str): The prompt describing the desired changes. Returns: The response from the GitHub API, or None if an error occurs. """ try: validate_github_setup() except Exception as e: resp = "Error validating GitHub setup" print(f"{resp}: {e}") return resp response = "" try: repo_name = os.environ["GITHUB_REPO_NAME"] delete_folder(repo_name) source_code = get_source_code(repo_name) summary = get_summary(README_UPDATE_INSTRUCTIONS, source_code) branch = "feature/docs-update" file = "README.md" response = create_github_pr( branch, { file: summary, }, ) return response except Exception as e: print(f"Failed to create pull request: {e}") finally: delete_folder(repo_name) return response def get_summary(instructions, source_code): """Uses a language model to generate a README summary based on provided instructions and source code. Args: instructions (str): Instructions for generating the summary. source_code (str): The project's source code. Returns: str: The generated README summary. """ with telemetry.tool_context_manager(USER_AGENT): code_chat = model.start_chat(response_validation=False) code_chat.send_message(instructions) response = code_chat.send_message(source_code) return response.text README_UPDATE_INSTRUCTIONS = """### Instruction ### Generate a comprehensive README.md file for the provided context. The README should follow industry best practices and be suitable for professional developers. Resources like dora.dev, stc.org, and writethedocs.org should be used as guidelines. It should be clear, concise, and easy to read written in a professional mannor conveying the project's purpose and value effectively. ### Output Format ### A well-structured README.md file in Markdown format. The README, using markdown formatting, should include the following sections (at a minimum): Description Table of Contents Features Installation Usage Contributing License Contact ### Example Dialogue ### Instruction: Generate a comprehensive README.md file for the provided project. The README should follow industry best practices and be suitable for professional developers. Context (project): Project Name: Cymbal Coffee Description: A Python library for data analysis and visualization, designed to simplify common data wrangling tasks and generate insightful plots. Technologies Used: Python, Pandas, NumPy, Matplotlib, Seaborn Features: * Easy data loading from various sources (CSV, Excel, SQL, etc.) * Powerful data cleaning and transformation functions * Interactive data exploration with summary statistics and filtering * Customizable visualization templates for common plot types * Integration with Jupyter Notebooks for seamless analysis Installation: pip install cymbal Usage: See examples in the 'examples' directory or visit our documentation: [link to documentation] Contribution Guidelines: We welcome contributions! Please follow our style guide and submit pull requests for review. License: Apache 2.0 License Contact Information: Email us at support@cymbal.coffee or open an issue on our GitHub repository. """