audiotranscription.go (217 lines of code) (raw):

// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. package openai import ( "bytes" "context" "encoding/json" "io" "mime/multipart" "net/http" "github.com/openai/openai-go/internal/apiform" "github.com/openai/openai-go/internal/apijson" "github.com/openai/openai-go/internal/requestconfig" "github.com/openai/openai-go/option" "github.com/openai/openai-go/packages/param" "github.com/openai/openai-go/packages/resp" "github.com/openai/openai-go/packages/ssestream" "github.com/openai/openai-go/shared/constant" ) // AudioTranscriptionService contains methods and other services that help with // interacting with the openai API. // // Note, unlike clients, this service does not read variables from the environment // automatically. You should not instantiate this service directly, and instead use // the [NewAudioTranscriptionService] method instead. type AudioTranscriptionService struct { Options []option.RequestOption } // NewAudioTranscriptionService generates a new service that applies the given // options to each request. These options are applied after the parent client's // options (if there is one), and before any request-specific options. func NewAudioTranscriptionService(opts ...option.RequestOption) (r AudioTranscriptionService) { r = AudioTranscriptionService{} r.Options = opts return } // Transcribes audio into the input language. func (r *AudioTranscriptionService) New(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (res *Transcription, err error) { opts = append(r.Options[:], opts...) path := "audio/transcriptions" err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &res, opts...) return } // Transcribes audio into the input language. func (r *AudioTranscriptionService) NewStreaming(ctx context.Context, body AudioTranscriptionNewParams, opts ...option.RequestOption) (stream *ssestream.Stream[TranscriptionStreamEventUnion]) { var ( raw *http.Response err error ) opts = append(r.Options[:], opts...) opts = append([]option.RequestOption{option.WithJSONSet("stream", true)}, opts...) path := "audio/transcriptions" err = requestconfig.ExecuteNewRequest(ctx, http.MethodPost, path, body, &raw, opts...) return ssestream.NewStream[TranscriptionStreamEventUnion](ssestream.NewDecoder(raw), err) } // Represents a transcription response returned by model, based on the provided // input. type Transcription struct { // The transcribed text. Text string `json:"text,required"` // The log probabilities of the tokens in the transcription. Only returned with the // models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe` if `logprobs` is added // to the `include` array. Logprobs []TranscriptionLogprob `json:"logprobs"` // Metadata for the response, check the presence of optional fields with the // [resp.Field.IsPresent] method. JSON struct { Text resp.Field Logprobs resp.Field ExtraFields map[string]resp.Field raw string } `json:"-"` } // Returns the unmodified JSON received from the API func (r Transcription) RawJSON() string { return r.JSON.raw } func (r *Transcription) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } type TranscriptionLogprob struct { // The token in the transcription. Token string `json:"token"` // The bytes of the token. Bytes []float64 `json:"bytes"` // The log probability of the token. Logprob float64 `json:"logprob"` // Metadata for the response, check the presence of optional fields with the // [resp.Field.IsPresent] method. JSON struct { Token resp.Field Bytes resp.Field Logprob resp.Field ExtraFields map[string]resp.Field raw string } `json:"-"` } // Returns the unmodified JSON received from the API func (r TranscriptionLogprob) RawJSON() string { return r.JSON.raw } func (r *TranscriptionLogprob) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } type TranscriptionInclude string const ( TranscriptionIncludeLogprobs TranscriptionInclude = "logprobs" ) // TranscriptionStreamEventUnion contains all possible properties and values from // [TranscriptionTextDeltaEvent], [TranscriptionTextDoneEvent]. // // Use the [TranscriptionStreamEventUnion.AsAny] method to switch on the variant. // // Use the methods beginning with 'As' to cast the union to one of its variants. type TranscriptionStreamEventUnion struct { // This field is from variant [TranscriptionTextDeltaEvent]. Delta string `json:"delta"` // Any of "transcript.text.delta", "transcript.text.done". Type string `json:"type"` // This field is a union of [[]TranscriptionTextDeltaEventLogprob], // [[]TranscriptionTextDoneEventLogprob] Logprobs TranscriptionStreamEventUnionLogprobs `json:"logprobs"` // This field is from variant [TranscriptionTextDoneEvent]. Text string `json:"text"` JSON struct { Delta resp.Field Type resp.Field Logprobs resp.Field Text resp.Field raw string } `json:"-"` } // anyTranscriptionStreamEvent is implemented by each variant of // [TranscriptionStreamEventUnion] to add type safety for the return type of // [TranscriptionStreamEventUnion.AsAny] type anyTranscriptionStreamEvent interface { implTranscriptionStreamEventUnion() } func (TranscriptionTextDeltaEvent) implTranscriptionStreamEventUnion() {} func (TranscriptionTextDoneEvent) implTranscriptionStreamEventUnion() {} // Use the following switch statement to find the correct variant // // switch variant := TranscriptionStreamEventUnion.AsAny().(type) { // case TranscriptionTextDeltaEvent: // case TranscriptionTextDoneEvent: // default: // fmt.Errorf("no variant present") // } func (u TranscriptionStreamEventUnion) AsAny() anyTranscriptionStreamEvent { switch u.Type { case "transcript.text.delta": return u.AsTranscriptTextDelta() case "transcript.text.done": return u.AsTranscriptTextDone() } return nil } func (u TranscriptionStreamEventUnion) AsTranscriptTextDelta() (v TranscriptionTextDeltaEvent) { apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v) return } func (u TranscriptionStreamEventUnion) AsTranscriptTextDone() (v TranscriptionTextDoneEvent) { apijson.UnmarshalRoot(json.RawMessage(u.JSON.raw), &v) return } // Returns the unmodified JSON received from the API func (u TranscriptionStreamEventUnion) RawJSON() string { return u.JSON.raw } func (r *TranscriptionStreamEventUnion) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } // TranscriptionStreamEventUnionLogprobs is an implicit subunion of // [TranscriptionStreamEventUnion]. TranscriptionStreamEventUnionLogprobs provides // convenient access to the sub-properties of the union. // // For type safety it is recommended to directly use a variant of the // [TranscriptionStreamEventUnion]. // // If the underlying value is not a json object, one of the following properties // will be valid: OfTranscriptionTextDeltaEventLogprobs // OfTranscriptionTextDoneEventLogprobs] type TranscriptionStreamEventUnionLogprobs struct { // This field will be present if the value is a // [[]TranscriptionTextDeltaEventLogprob] instead of an object. OfTranscriptionTextDeltaEventLogprobs []TranscriptionTextDeltaEventLogprob `json:",inline"` // This field will be present if the value is a // [[]TranscriptionTextDoneEventLogprob] instead of an object. OfTranscriptionTextDoneEventLogprobs []TranscriptionTextDoneEventLogprob `json:",inline"` JSON struct { OfTranscriptionTextDeltaEventLogprobs resp.Field OfTranscriptionTextDoneEventLogprobs resp.Field raw string } `json:"-"` } func (r *TranscriptionStreamEventUnionLogprobs) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } // Emitted when there is an additional text delta. This is also the first event // emitted when the transcription starts. Only emitted when you // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) // with the `Stream` parameter set to `true`. type TranscriptionTextDeltaEvent struct { // The text delta that was additionally transcribed. Delta string `json:"delta,required"` // The type of the event. Always `transcript.text.delta`. Type constant.TranscriptTextDelta `json:"type,required"` // The log probabilities of the delta. Only included if you // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) // with the `include[]` parameter set to `logprobs`. Logprobs []TranscriptionTextDeltaEventLogprob `json:"logprobs"` // Metadata for the response, check the presence of optional fields with the // [resp.Field.IsPresent] method. JSON struct { Delta resp.Field Type resp.Field Logprobs resp.Field ExtraFields map[string]resp.Field raw string } `json:"-"` } // Returns the unmodified JSON received from the API func (r TranscriptionTextDeltaEvent) RawJSON() string { return r.JSON.raw } func (r *TranscriptionTextDeltaEvent) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } type TranscriptionTextDeltaEventLogprob struct { // The token that was used to generate the log probability. Token string `json:"token"` // The bytes that were used to generate the log probability. Bytes []interface{} `json:"bytes"` // The log probability of the token. Logprob float64 `json:"logprob"` // Metadata for the response, check the presence of optional fields with the // [resp.Field.IsPresent] method. JSON struct { Token resp.Field Bytes resp.Field Logprob resp.Field ExtraFields map[string]resp.Field raw string } `json:"-"` } // Returns the unmodified JSON received from the API func (r TranscriptionTextDeltaEventLogprob) RawJSON() string { return r.JSON.raw } func (r *TranscriptionTextDeltaEventLogprob) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } // Emitted when the transcription is complete. Contains the complete transcription // text. Only emitted when you // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) // with the `Stream` parameter set to `true`. type TranscriptionTextDoneEvent struct { // The text that was transcribed. Text string `json:"text,required"` // The type of the event. Always `transcript.text.done`. Type constant.TranscriptTextDone `json:"type,required"` // The log probabilities of the individual tokens in the transcription. Only // included if you // [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription) // with the `include[]` parameter set to `logprobs`. Logprobs []TranscriptionTextDoneEventLogprob `json:"logprobs"` // Metadata for the response, check the presence of optional fields with the // [resp.Field.IsPresent] method. JSON struct { Text resp.Field Type resp.Field Logprobs resp.Field ExtraFields map[string]resp.Field raw string } `json:"-"` } // Returns the unmodified JSON received from the API func (r TranscriptionTextDoneEvent) RawJSON() string { return r.JSON.raw } func (r *TranscriptionTextDoneEvent) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } type TranscriptionTextDoneEventLogprob struct { // The token that was used to generate the log probability. Token string `json:"token"` // The bytes that were used to generate the log probability. Bytes []interface{} `json:"bytes"` // The log probability of the token. Logprob float64 `json:"logprob"` // Metadata for the response, check the presence of optional fields with the // [resp.Field.IsPresent] method. JSON struct { Token resp.Field Bytes resp.Field Logprob resp.Field ExtraFields map[string]resp.Field raw string } `json:"-"` } // Returns the unmodified JSON received from the API func (r TranscriptionTextDoneEventLogprob) RawJSON() string { return r.JSON.raw } func (r *TranscriptionTextDoneEventLogprob) UnmarshalJSON(data []byte) error { return apijson.UnmarshalRoot(data, r) } type AudioTranscriptionNewParams struct { // The audio file object (not file name) to transcribe, in one of these formats: // flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. File io.Reader `json:"file,required" format:"binary"` // ID of the model to use. The options are `gpt-4o-transcribe`, // `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source // Whisper V2 model). Model AudioModel `json:"model,omitzero,required"` // The language of the input audio. Supplying the input language in // [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) // format will improve accuracy and latency. Language param.Opt[string] `json:"language,omitzero"` // An optional text to guide the model's style or continue a previous audio // segment. The // [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) // should match the audio language. Prompt param.Opt[string] `json:"prompt,omitzero"` // The sampling temperature, between 0 and 1. Higher values like 0.8 will make the // output more random, while lower values like 0.2 will make it more focused and // deterministic. If set to 0, the model will use // [log probability](https://en.wikipedia.org/wiki/Log_probability) to // automatically increase the temperature until certain thresholds are hit. Temperature param.Opt[float64] `json:"temperature,omitzero"` // Additional information to include in the transcription response. `logprobs` will // return the log probabilities of the tokens in the response to understand the // model's confidence in the transcription. `logprobs` only works with // response_format set to `json` and only with the models `gpt-4o-transcribe` and // `gpt-4o-mini-transcribe`. Include []TranscriptionInclude `json:"include,omitzero"` // The format of the output, in one of these options: `json`, `text`, `srt`, // `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, // the only supported format is `json`. // // Any of "json", "text", "srt", "verbose_json", "vtt". ResponseFormat AudioResponseFormat `json:"response_format,omitzero"` // The timestamp granularities to populate for this transcription. // `response_format` must be set `verbose_json` to use timestamp granularities. // Either or both of these options are supported: `word`, or `segment`. Note: There // is no additional latency for segment timestamps, but generating word timestamps // incurs additional latency. // // Any of "word", "segment". TimestampGranularities []string `json:"timestamp_granularities,omitzero"` paramObj } // IsPresent returns true if the field's value is not omitted and not the JSON // "null". To check if this field is omitted, use [param.IsOmitted]. func (f AudioTranscriptionNewParams) IsPresent() bool { return !param.IsOmitted(f) && !f.IsNull() } func (r AudioTranscriptionNewParams) MarshalMultipart() (data []byte, contentType string, err error) { buf := bytes.NewBuffer(nil) writer := multipart.NewWriter(buf) err = apiform.MarshalRoot(r, writer) if err != nil { writer.Close() return nil, "", err } err = writer.Close() if err != nil { return nil, "", err } return buf.Bytes(), writer.FormDataContentType(), nil }