backend/app/extraction/TranscriptionExtractor.scala (68 lines of code) (raw):
package extraction
import cats.syntax.either._
import model.manifest.Blob
import model.{English, Languages}
import org.apache.commons.io.FileUtils
import services.{ScratchSpace, TranscribeConfig}
import services.index.Index
import utils.FfMpeg.FfMpegSubprocessCrashedException
import utils.attempt.{Failure, FfMpegFailure, UnknownFailure}
import utils._
import scala.concurrent.ExecutionContext
import java.io.File
class TranscriptionExtractor(index: Index, scratchSpace: ScratchSpace, transcribeConfig: TranscribeConfig)(implicit executionContext: ExecutionContext) extends FileExtractor(scratchSpace) with Logging {
val mimeTypes: Set[String] = Set(
"audio/wav",
"audio/vnd.wave",
"audio/x-aiff", // converted and transcribed. But preview doesn't work
"audio/mpeg",
"audio/aac", // tika can't detect this!!
"audio/vorbis", // Converted by ffmpeg but failed in whisper
"audio/opus",
"audio/amr", // converted and transcribed. But preview doesn't work
"audio/amr-wb", // Couldn't find a sample to test
"audio/x-caf", // Couldn't find a sample to test
"audio/mp4", // Couldn't find a sample to test
"audio/x-ms-wma", // converted and transcribed. But preview doesn't work
"video/3gpp",
"video/mp4", // quicktime detected for some of mp4 samples
"video/quicktime",
"video/x-flv", // converted and transcribed. But preview doesn't work
"video/x-ms-wmv", // converted and transcribed. But preview doesn't work
"video/x-msvideo", // converted and transcribed. But preview doesn't work
"video/x-m4v",
"video/mpeg" // converted and transcribed. But preview doesn't work
)
def canProcessMimeType: String => Boolean = mimeTypes.contains
override def indexing = true
// set a low priority as transcription takes a long time, we don't want to block up the workers
override def priority = 1
override def extract(blob: Blob, file: File, params: ExtractionParams): Either[Failure, Unit] = {
logger.info(s"Running transcription extractor '${blob.uri.value}'")
val tmpDir = scratchSpace.createWorkingDir(s"whisper-tmp-${blob.uri.value}")
val ffMpegTmpDir = scratchSpace.createWorkingDir(s"ffmpeg-tmp-${blob.uri.value}")
val stdErrLogger = new BasicStdErrLogger()
val result = Either.catchNonFatal{
val convertedFile = FfMpeg.convertToWav(file.toPath, ffMpegTmpDir)
val transcriptResult: TranscriptionResult = Whisper.invokeWhisper(convertedFile, transcribeConfig, tmpDir, stdErrLogger, translate = false)
val translationResult = if (transcriptResult.language != "en") Some(Whisper.invokeWhisper(convertedFile, transcribeConfig, tmpDir, stdErrLogger, translate = true)) else None
index.addDocumentTranscription(blob.uri, transcriptResult.text, translationResult.map(r => r.text), Languages.getByIso6391Code(transcriptResult.language)
.getOrElse(English)).recoverWith {
case _ =>
val msg = s"Failed to write transcript result to elasticsearch. Transcript language: ${transcriptResult.language}"
logger.error(msg)
// throw the error - will be caught by catchNonFatal
throw new Error(msg)
}
()
}.leftMap{
case error: FfMpegSubprocessCrashedException =>
logger.error (s"${this.name} error ${stdErrLogger.getOutput}", error)
FfMpegFailure(error, s"FfMpegFailure - exit code ${error.exitCode}")
case error =>
logger.error (s"${this.name} error ${stdErrLogger.getOutput}", error)
UnknownFailure.apply (error)
}
FileUtils.deleteDirectory(tmpDir.toFile)
FileUtils.deleteDirectory(ffMpegTmpDir.toFile)
result
}
}