in HuggingChat-Mac/LocalSTT/AudioModelManager.swift [619:705]
func transcribeAudioSamples(_ samples: [Float]) async throws -> TranscriptionResult? {
guard let whisperKit = whisperKit else { return nil }
let languageCode = Constants.languages[selectedLanguage, default: Constants.defaultLanguageCode]
let task: DecodingTask = selectedTask == "transcribe" ? .transcribe : .translate
let seekClip: [Float] = [lastConfirmedSegmentEndSeconds]
let options = DecodingOptions(
verbose: true,
task: task,
language: languageCode,
temperature: Float(temperatureStart),
temperatureFallbackCount: Int(fallbackCount),
sampleLength: Int(sampleLength),
usePrefillPrompt: enablePromptPrefill,
usePrefillCache: enableCachePrefill,
skipSpecialTokens: !enableSpecialCharacters,
withoutTimestamps: !enableTimestamps,
wordTimestamps: true,
clipTimestamps: seekClip,
chunkingStrategy: chunkingStrategy
)
// Early stopping checks
let decodingCallback: ((TranscriptionProgress) -> Bool?) = { [self] (progress: TranscriptionProgress) in
DispatchQueue.main.async {
let fallbacks = Int(progress.timings.totalDecodingFallbacks)
let chunkId = self.transcriptionMode == .streaming ? 0 : progress.windowId
// First check if this is a new window for the same chunk, append if so
var updatedChunk = (chunkText: [progress.text], fallbacks: fallbacks)
if var currentChunk = self.currentChunks[chunkId], let previousChunkText = currentChunk.chunkText.last {
if progress.text.count >= previousChunkText.count {
// This is the same window of an existing chunk, so we just update the last value
currentChunk.chunkText[currentChunk.chunkText.endIndex - 1] = progress.text
updatedChunk = currentChunk
} else {
// This is either a new window or a fallback (only in streaming mode)
if fallbacks == currentChunk.fallbacks && self.transcriptionMode == .streaming {
// New window (since fallbacks havent changed)
updatedChunk.chunkText = [updatedChunk.chunkText.first ?? "" + progress.text]
} else {
// Fallback, overwrite the previous bad text
updatedChunk.chunkText[currentChunk.chunkText.endIndex - 1] = progress.text
updatedChunk.fallbacks = fallbacks
print("Fallback occured: \(fallbacks)")
}
}
}
// Set the new text for the chunk
self.currentChunks[chunkId] = updatedChunk
let joinedChunks = self.currentChunks.sorted { $0.key < $1.key }.flatMap { $0.value.chunkText }.joined(separator: "\n")
self.currentText = joinedChunks
self.currentFallbacks = fallbacks
self.currentDecodingLoops += 1
}
// Check early stopping
let currentTokens = progress.tokens
let checkWindow = Int(compressionCheckWindow)
if currentTokens.count > checkWindow {
let checkTokens: [Int] = currentTokens.suffix(checkWindow)
let compressionRatio = compressionRatio(of: checkTokens)
if compressionRatio > options.compressionRatioThreshold! {
Logging.debug("Early stopping due to compression threshold")
return false
}
}
if progress.avgLogprob! < options.logProbThreshold! {
Logging.debug("Early stopping due to logprob threshold")
return false
}
return nil
}
let transcriptionResults: [TranscriptionResult] = try await whisperKit.transcribe(
audioArray: samples,
decodeOptions: options,
callback: decodingCallback
)
let mergedResults = mergeTranscriptionResults(transcriptionResults)
return mergedResults
}