backend/app/utils/Whisper.scala (37 lines of code) (raw):
package utils
import services.TranscribeConfig
import java.nio.file.Path
import scala.io.Source
import scala.sys.process._
case class TranscriptionResult(text: String, language: String)
object Whisper extends Logging {
private class WhisperSubprocessCrashedException(exitCode: Int, stderr: String) extends Exception(s"Exit code: $exitCode: ${stderr}")
private def getTranscriptOutputText(outputFile: Path) = {
//for some reason whisper adds an extra .txt extension
val outputLocation = outputFile.resolveSibling(outputFile.getFileName.toString + ".txt")
val outputSource = Source.fromFile(outputLocation.toFile)
val outputText = outputSource.getLines().toList.mkString("\n")
outputSource.close()
outputText
}
def invokeWhisper(audioFilePath: Path, config: TranscribeConfig, tmpDir: Path, whisperLogger: BasicStdErrLogger, translate: Boolean): TranscriptionResult = {
val tempFile = tmpDir.resolve(s"${audioFilePath.getFileName}")
val translateParam = if(translate) "--translate" else ""
val cmd = s"/opt/whisper/whisper.cpp/main -m /opt/whisper/whisper.cpp/models/${config.whisperModelFilename} -f ${audioFilePath.toString} --output-txt --output-file ${tempFile.toString} -l auto ${translateParam}"
val exitCode = Process(cmd, cwd = None).!(ProcessLogger(stdout.append(_), whisperLogger.append))
exitCode match {
case 0 =>
val transcriptText = getTranscriptOutputText(tempFile)
val languageSplit = whisperLogger.getOutput.split("auto-detected language: ")
if (languageSplit.length > 1) {
val detectedLanguage = if (translate) "en" else languageSplit(1).slice(0,2).mkString("")
TranscriptionResult(transcriptText, detectedLanguage)
} else {
logger.warn("Failed to detect language - transcription may have failed. Falling back to english.")
TranscriptionResult(transcriptText, "en")
}
case _ =>
logger.error("Whisper extraction failed")
throw new WhisperSubprocessCrashedException(exitCode, whisperLogger.getOutput)
}
}
}