utils/create_phrase_set.py (27 lines of code) (raw):

# Copyright 2024 Google LLC # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Create PhraseSet to provide hints to the speech recognizer. To favor specific words and phrases in the results. """ from google.api_core import client_options from google.cloud import speech_v2 from google.cloud.speech_v2.types import cloud_speech def create_phrase_set( project_id, phrase_set_id, phrases ): """Creates PhraseSet based on the provided phrases. Args: project_id: GCP Project Id phrase_set_id: Name given to the phrase set phrases: List of words and phrases """ options = client_options.ClientOptions(api_endpoint="speech.googleapis.com") # Create a client client = speech_v2.SpeechClient(client_options=options) # Create a persistent PhraseSet to reference in a recognition request request = cloud_speech.CreatePhraseSetRequest( parent=f"projects/{project_id}/locations/global", phrase_set_id=phrase_set_id, phrase_set=cloud_speech.PhraseSet(phrases=[{"value": phrase, "boost": 20} for phrase in phrases]), ) operation = client.create_phrase_set(request=request) phrase_set = operation.result() print("phrase_set\n", phrase_set) PROJECT_ID = "<PROJECT_ID>" PHRASE_SET_ID = "<NAME FOR PHRASE SET>" PHRASES = ["<PHRASES>"] create_phrase_set( PROJECT_ID, PHRASE_SET_ID, PHRASES )