config/crds/resources/apiextensions.k8s.io_v1_customresourcedefinition_speechrecognizers.speech.cnrm.cloud.google.com.yaml (688 lines of code) (raw):
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
cnrm.cloud.google.com/version: 0.0.0-dev
creationTimestamp: null
labels:
cnrm.cloud.google.com/managed-by-kcc: "true"
cnrm.cloud.google.com/system: "true"
name: speechrecognizers.speech.cnrm.cloud.google.com
spec:
group: speech.cnrm.cloud.google.com
names:
categories:
- gcp
kind: SpeechRecognizer
listKind: SpeechRecognizerList
plural: speechrecognizers
shortNames:
- gcpspeechrecognizer
- gcpspeechrecognizers
singular: speechrecognizer
preserveUnknownFields: false
scope: Namespaced
versions:
- additionalPrinterColumns:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
- description: When 'True', the most recent reconcile of the resource succeeded
jsonPath: .status.conditions[?(@.type=='Ready')].status
name: Ready
type: string
- description: The reason for the value in 'Ready'
jsonPath: .status.conditions[?(@.type=='Ready')].reason
name: Status
type: string
- description: The last transition time for the value in 'Status'
jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
name: Status Age
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: SpeechRecognizer is the Schema for the SpeechRecognizer API
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: SpeechRecognizerSpec defines the desired state of SpeechRecognizer
properties:
annotations:
additionalProperties:
type: string
description: Allows users to store small amounts of arbitrary data.
Both the key and the value must be 63 characters or less each. At
most 100 annotations.
type: object
defaultRecognitionConfig:
description: Default configuration to use for requests with this Recognizer.
This can be overwritten by inline configuration in the [RecognizeRequest.config][google.cloud.speech.v2.RecognizeRequest.config]
field.
properties:
adaptation:
description: Speech adaptation context that weights recognizer
predictions for specific words and phrases.
properties:
customClasses:
description: A list of inline CustomClasses. Existing CustomClass
resources can be referenced directly in a PhraseSet.
items:
properties:
annotations:
additionalProperties:
type: string
description: Optional. Allows users to store small amounts
of arbitrary data. Both the key and the value must
be 63 characters or less each. At most 100 annotations.
type: object
displayName:
description: Optional. User-settable, human-readable
name for the CustomClass. Must be 63 characters or
less.
type: string
items:
description: A collection of class items.
items:
properties:
value:
description: The class item's value.
type: string
type: object
type: array
type: object
type: array
phraseSets:
description: A list of inline or referenced PhraseSets.
items:
properties:
inlinePhraseSet:
description: An inline defined PhraseSet.
properties:
annotations:
additionalProperties:
type: string
description: Allows users to store small amounts
of arbitrary data. Both the key and the value
must be 63 characters or less each. At most 100
annotations.
type: object
boost:
description: Hint Boost. Positive value will increase
the probability that a specific phrase will be
recognized over other similar sounding phrases.
The higher the boost, the higher the chance of
false positive recognition as well. Valid `boost`
values are between 0 (exclusive) and 20. We recommend
using a binary search approach to finding the
optimal value for your use case as well as adding
phrases both with and without boost to your requests.
type: string
displayName:
description: User-settable, human-readable name
for the PhraseSet. Must be 63 characters or less.
type: string
phrases:
description: A list of word and phrases.
items:
properties:
boost:
description: Hint Boost. Overrides the boost
set at the phrase set level. Positive value
will increase the probability that a specific
phrase will be recognized over other similar
sounding phrases. The higher the boost,
the higher the chance of false positive
recognition as well. Negative boost values
would correspond to anti-biasing. Anti-biasing
is not enabled, so negative boost values
will return an error. Boost values must
be between 0 and 20. Any values outside
that range will return an error. We recommend
using a binary search approach to finding
the optimal value for your use case as well
as adding phrases both with and without
boost to your requests.
type: string
value:
description: The phrase itself.
type: string
type: object
type: array
type: object
phraseSetRef:
description: The name of an existing PhraseSet resource.
The user must have read access to the resource and
it must not be deleted.
oneOf:
- not:
required:
- external
required:
- name
- not:
anyOf:
- required:
- name
- required:
- namespace
required:
- external
properties:
external:
description: A reference to an externally managed
SpeechPhraseSet resource. Should be in the format
"projects/{{projectID}}/locations/{{location}}/phraseSets/{{phrasesetID}}".
type: string
name:
description: The name of a SpeechPhraseSet resource.
type: string
namespace:
description: The namespace of a SpeechPhraseSet
resource.
type: string
type: object
type: object
type: array
type: object
autoDecodingConfig:
description: Automatically detect decoding parameters. Preferred
for supported formats.
type: object
explicitDecodingConfig:
description: Explicitly specified decoding parameters. Required
if using headerless PCM audio (linear16, mulaw, alaw).
properties:
audioChannelCount:
description: |-
Optional. Number of channels present in the audio data sent for
recognition. Note that this field is marked as OPTIONAL for backward
compatibility reasons. It is (and has always been) effectively REQUIRED.
The maximum allowed value is 8.
format: int32
type: integer
encoding:
description: Required. Encoding of the audio data sent for
recognition.
type: string
sampleRateHertz:
description: 'Optional. Sample rate in Hertz of the audio
data sent for recognition. Valid values are: 8000-48000.
16000 is optimal. For best results, set the sampling rate
of the audio source to 16000 Hz. If that''s not possible,
use the native sample rate of the audio source (instead
of re-sampling). Note that this field is marked as OPTIONAL
for backward compatibility reasons. It is (and has always
been) effectively REQUIRED.'
format: int32
type: integer
required:
- encoding
type: object
features:
description: Speech recognition features to enable.
properties:
diarizationConfig:
description: Configuration to enable speaker diarization and
set additional parameters to make diarization better suited
for your application. When this is enabled, we send all
the words from the beginning of the audio for the top alternative
in every consecutive STREAMING responses. This is done in
order to improve our speaker tags as our models learn to
identify the speakers in the conversation over time. For
non-streaming requests, the diarization results will be
provided only in the top alternative of the FINAL SpeechRecognitionResult.
properties:
maxSpeakerCount:
description: 'Required. Maximum number of speakers in
the conversation. Valid values are: 1-6. Must be >=
`min_speaker_count`. This range gives you more flexibility
by allowing the system to automatically determine the
correct number of speakers.'
format: int32
type: integer
minSpeakerCount:
description: |-
Required. Minimum number of speakers in the conversation. This range gives
you more flexibility by allowing the system to automatically determine the
correct number of speakers.
To fix the number of speakers detected in the audio, set
`min_speaker_count` = `max_speaker_count`.
format: int32
type: integer
required:
- maxSpeakerCount
- minSpeakerCount
type: object
enableAutomaticPunctuation:
description: If `true`, adds punctuation to recognition result
hypotheses. This feature is only available in select languages.
The default `false` value does not add punctuation to result
hypotheses.
type: boolean
enableSpokenEmojis:
description: The spoken emoji behavior for the call. If `true`,
adds spoken emoji formatting for the request. This will
replace spoken emojis with the corresponding Unicode symbols
in the final transcript. If `false`, spoken emojis are not
replaced.
type: boolean
enableSpokenPunctuation:
description: The spoken punctuation behavior for the call.
If `true`, replaces spoken punctuation with the corresponding
symbols in the request. For example, "how are you question
mark" becomes "how are you?". See https://cloud.google.com/speech-to-text/docs/spoken-punctuation
for support. If `false`, spoken punctuation is not replaced.
type: boolean
enableWordConfidence:
description: If `true`, the top result includes a list of
words and the confidence for those words. If `false`, no
word-level confidence information is returned. The default
is `false`.
type: boolean
enableWordTimeOffsets:
description: If `true`, the top result includes a list of
words and the start and end time offsets (timestamps) for
those words. If `false`, no word-level time offset information
is returned. The default is `false`.
type: boolean
maxAlternatives:
description: Maximum number of recognition hypotheses to be
returned. The server may return fewer than `max_alternatives`.
Valid values are `0`-`30`. A value of `0` or `1` will return
a maximum of one. If omitted, will return a maximum of one.
format: int32
type: integer
multiChannelMode:
description: Mode for recognizing multi-channel audio.
type: string
profanityFilter:
description: If set to `true`, the server will attempt to
filter out profanities, replacing all but the initial character
in each filtered word with asterisks, for instance, "f***".
If set to `false` or omitted, profanities won't be filtered
out.
type: boolean
type: object
languageCodes:
description: |-
Optional. The language of the supplied audio as a
[BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
Language tags are normalized to BCP-47 before they are used eg "en-us"
becomes "en-US".
Supported languages for each model are listed in the [Table of Supported
Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
If additional languages are provided, recognition result will contain
recognition in the most likely language detected. The recognition result
will include the language tag of the language detected in the audio.
items:
type: string
type: array
model:
description: |-
Optional. Which model to use for recognition requests. Select the model
best suited to your domain to get best results.
Guidance for choosing which model to use can be found in the [Transcription
Models
Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
and the models supported in each region can be found in the [Table Of
Supported
Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
type: string
transcriptNormalization:
description: Optional. Use transcription normalization to automatically
replace parts of the transcript with phrases of your choosing.
For StreamingRecognize, this normalization only applies to stable
partial transcripts (stability > 0.8) and final transcripts.
properties:
entries:
description: A list of replacement entries. We will perform
replacement with one entry at a time. For example, the second
entry in ["cat" => "dog", "mountain cat" => "mountain dog"]
will never be applied because we will always process the
first entry before it. At most 100 entries.
items:
properties:
caseSensitive:
description: Whether the search is case sensitive.
type: boolean
replace:
description: What to replace with. Max length is 100
characters.
type: string
search:
description: What to replace. Max length is 100 characters.
type: string
type: object
type: array
type: object
translationConfig:
description: Optional. Optional configuration used to automatically
run translation on the given audio to the desired language for
supported models.
properties:
targetLanguage:
description: Required. The language code to translate to.
type: string
required:
- targetLanguage
type: object
type: object
displayName:
description: User-settable, human-readable name for the Recognizer.
Must be 63 characters or less.
type: string
languageCodes:
description: |-
Optional. This field is now deprecated. Prefer the
[`language_codes`][google.cloud.speech.v2.RecognitionConfig.language_codes]
field in the
[`RecognitionConfig`][google.cloud.speech.v2.RecognitionConfig] message.
The language of the supplied audio as a
[BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
Supported languages for each model are listed in the [Table of Supported
Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
If additional languages are provided, recognition result will contain
recognition in the most likely language detected. The recognition result
will include the language tag of the language detected in the audio.
When you create or update a Recognizer, these values are
stored in normalized BCP-47 form. For example, "en-us" is stored as
"en-US".
items:
type: string
type: array
location:
description: Immutable.
type: string
x-kubernetes-validations:
- message: Location field is immutable
rule: self == oldSelf
model:
description: |-
Optional. This field is now deprecated. Prefer the
[`model`][google.cloud.speech.v2.RecognitionConfig.model] field in the
[`RecognitionConfig`][google.cloud.speech.v2.RecognitionConfig] message.
Which model to use for recognition requests. Select the model best suited
to your domain to get best results.
Guidance for choosing which model to use can be found in the [Transcription
Models
Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
and the models supported in each region can be found in the [Table Of
Supported
Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
type: string
projectRef:
description: The Project that this resource belongs to.
oneOf:
- not:
required:
- external
required:
- name
- not:
anyOf:
- required:
- name
- required:
- namespace
required:
- external
properties:
external:
description: The `projectID` field of a project, when not managed
by Config Connector.
type: string
kind:
description: The kind of the Project resource; optional but must
be `Project` if provided.
type: string
name:
description: The `name` field of a `Project` resource.
type: string
namespace:
description: The `namespace` field of a `Project` resource.
type: string
type: object
resourceID:
description: The SpeechRecognizer name. If not given, the metadata.name
will be used.
type: string
required:
- location
- projectRef
type: object
status:
description: SpeechRecognizerStatus defines the config connector machine
state of SpeechRecognizer
properties:
conditions:
description: Conditions represent the latest available observations
of the object's current state.
items:
properties:
lastTransitionTime:
description: Last time the condition transitioned from one status
to another.
type: string
message:
description: Human-readable message indicating details about
last transition.
type: string
reason:
description: Unique, one-word, CamelCase reason for the condition's
last transition.
type: string
status:
description: Status is the status of the condition. Can be True,
False, Unknown.
type: string
type:
description: Type is the type of the condition.
type: string
type: object
type: array
externalRef:
description: A unique specifier for the SpeechRecognizer resource
in GCP.
type: string
observedGeneration:
description: ObservedGeneration is the generation of the resource
that was most recently observed by the Config Connector controller.
If this is equal to metadata.generation, then that means that the
current reported status reflects the most recent desired state of
the resource.
format: int64
type: integer
observedState:
description: ObservedState is the state of the resource as most recently
observed in GCP.
properties:
createTime:
description: Output only. Creation time.
type: string
defaultRecognitionConfig:
description: Default configuration to use for requests with this
Recognizer. This can be overwritten by inline configuration
in the [RecognizeRequest.config][google.cloud.speech.v2.RecognizeRequest.config]
field.
properties:
adaptation:
description: Speech adaptation context that weights recognizer
predictions for specific words and phrases.
properties:
customClasses:
description: A list of inline CustomClasses. Existing
CustomClass resources can be referenced directly in
a PhraseSet.
items:
properties:
createTime:
description: Output only. Creation time.
type: string
deleteTime:
description: Output only. The time at which this
resource was requested for deletion.
type: string
etag:
description: Output only. This checksum is computed
by the server based on the value of other fields.
This may be sent on update, undelete, and delete
requests to ensure the client has an up-to-date
value before proceeding.
type: string
expireTime:
description: Output only. The time at which this
resource will be purged.
type: string
kmsKeyName:
description: Output only. The [KMS key name](https://cloud.google.com/kms/docs/resource-hierarchy#keys)
with which the CustomClass is encrypted. The expected
format is `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
type: string
kmsKeyVersionName:
description: Output only. The [KMS key version name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
with which the CustomClass is encrypted. The expected
format is `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
type: string
name:
description: 'Output only. Identifier. The resource
name of the CustomClass. Format: `projects/{project}/locations/{location}/customClasses/{custom_class}`.'
type: string
reconciling:
description: Output only. Whether or not this CustomClass
is in the process of being updated.
type: boolean
state:
description: Output only. The CustomClass lifecycle
state.
type: string
uid:
description: Output only. System-assigned unique
identifier for the CustomClass.
type: string
updateTime:
description: Output only. The most recent time this
resource was modified.
type: string
type: object
type: array
phraseSets:
description: A list of inline or referenced PhraseSets.
items:
properties:
inlinePhraseSet:
description: An inline defined PhraseSet.
properties:
createTime:
description: Output only. Creation time.
type: string
deleteTime:
description: Output only. The time at which
this resource was requested for deletion.
type: string
etag:
description: Output only. This checksum is computed
by the server based on the value of other
fields. This may be sent on update, undelete,
and delete requests to ensure the client has
an up-to-date value before proceeding.
type: string
expireTime:
description: Output only. The time at which
this resource will be purged.
type: string
kmsKeyName:
description: Output only. The [KMS key name](https://cloud.google.com/kms/docs/resource-hierarchy#keys)
with which the PhraseSet is encrypted. The
expected format is `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
type: string
kmsKeyVersionName:
description: Output only. The [KMS key version
name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
with which the PhraseSet is encrypted. The
expected format is `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
type: string
name:
description: 'Output only. Identifier. The resource
name of the PhraseSet. Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.'
type: string
reconciling:
description: Output only. Whether or not this
PhraseSet is in the process of being updated.
type: boolean
state:
description: Output only. The PhraseSet lifecycle
state.
type: string
uid:
description: Output only. System-assigned unique
identifier for the PhraseSet.
type: string
updateTime:
description: Output only. The most recent time
this resource was modified.
type: string
type: object
type: object
type: array
type: object
type: object
deleteTime:
description: Output only. The time at which this Recognizer was
requested for deletion.
type: string
etag:
description: Output only. This checksum is computed by the server
based on the value of other fields. This may be sent on update,
undelete, and delete requests to ensure the client has an up-to-date
value before proceeding.
type: string
expireTime:
description: Output only. The time at which this Recognizer will
be purged.
type: string
kmsKeyName:
description: Output only. The [KMS key name](https://cloud.google.com/kms/docs/resource-hierarchy#keys)
with which the Recognizer is encrypted. The expected format
is `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
type: string
kmsKeyVersionName:
description: Output only. The [KMS key version name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
with which the Recognizer is encrypted. The expected format
is `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
type: string
reconciling:
description: Output only. Whether or not this Recognizer is in
the process of being updated.
type: boolean
state:
description: Output only. The Recognizer lifecycle state.
type: string
uid:
description: Output only. System-assigned unique identifier for
the Recognizer.
type: string
updateTime:
description: Output only. The most recent time this Recognizer
was modified.
type: string
type: object
type: object
required:
- spec
type: object
served: true
storage: true
subresources:
status: {}