services/4-model-evaluation/model-evaluation.py (42 lines of code) (raw):

# * Copyright 2022 Google LLC # * # * Licensed under the Apache License, Version 2.0 (the "License"); # * you may not use this file except in compliance with the License. # * You may obtain a copy of the License at # * # * http://www.apache.org/licenses/LICENSE-2.0 # * # * Unless required by applicable law or agreed to in writing, software # * distributed under the License is distributed on an "AS IS" BASIS, # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # * See the License for the specific language governing permissions and # * limitations under the License. import os import torch from datasets import load_dataset from transformers import ( AutoModelForCausalLM, AutoTokenizer, ) from peft import LoraConfig from trl import SFTTrainer from google.cloud import storage # OPTIONAL: Test finetuned model manually via a test prompt # Configuration for GCS and local paths MODEL_PATH_GCS = "fine_tuned_model" # GCS directory where model is saved MODEL_LOCAL_DIR = "./temp_model" # Local directory for temporary model storage TEST_PROMPT = "How is the movie beavers?" # Initialize GCS client and download model from GCS def download_model_from_gcs(bucket_name, model_gcs_path, local_dir): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blobs = bucket.list_blobs(prefix=model_gcs_path) os.makedirs(local_dir, exist_ok=True) for blob in blobs: local_file_path = os.path.join(local_dir, os.path.basename(blob.name)) blob.download_to_filename(local_file_path) print(f"Downloaded {blob.name} to {local_file_path}") # Download the model from GCS download_model_from_gcs(BUCKET_DATA_NAME, MODEL_PATH_GCS, MODEL_LOCAL_DIR) # Load the tokenizer and model from the local directory print("Loading tokenizer and model from local directory...") tokenizer = AutoTokenizer.from_pretrained(MODEL_LOCAL_DIR) model = AutoModelForCausalLM.from_pretrained(MODEL_LOCAL_DIR) print("Model loaded successfully.") # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Tokenize the input prompt and generate a response inputs = tokenizer(TEST_PROMPT, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( **inputs, max_length=200, # Adjust max length based on prompt size num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.8 ) # Decode and print the response generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) print("Response:", generated_text)