srf-audio-process-func/index.js

/** * Copyright 2021, Google, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ 'use strict'; // import modules const PubSub = require(`@google-cloud/pubsub`); const storage = require('@google-cloud/storage')(); const speech = require('@google-cloud/speech').v1p1beta1; const client = new speech.SpeechClient(); const ffmpeg = require('fluent-ffmpeg'); const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path; ffmpeg.setFfmpegPath(ffmpegPath); exports.srfAudioProcessFunc = (event, context, callback) => { const file = event; const topicName = "YOUR_TOPIC_NAME"; // TODO: Replace value with your GCP Pub/Sub topic name const audioPath = { uri: `gs://${file.bucket}/${file.name}` }; const readFile = storage.bucket(file.bucket).file(file.name); const remoteReadStream = async () => { try { const res = readFile.createReadStream(); return res; } catch (err) { console.error(err); }; }; function getAudioMetadata(path) { // check if file is a wav or flac audio file return new Promise((res, rej) => { ffmpeg.ffprobe(path, (err, metadata) => { if (err) return rej(err); const audioMetaData = require('util').inspect(metadata, false, null); if (!audioMetaData) throw new Error('Cannot find metadata of ' + path) return res(audioMetaData); }); }); }; remoteReadStream().then(resRemoteReadStream => { getAudioMetadata(resRemoteReadStream).then(res => { resRemoteReadStream.destroy(); // start cleanup - fluent-ffmpeg has an dirtyJSON output let resString = res.replace(/[&\/\\#+()$~%'"*?<>{}\s\n\]\[]/g, ''); // remove listed characters resString = resString.replace(/:/g, ','); // replace semicolon with commas let resArray = resString.split(','); // split string on commas // end cleanup let sampleRate = resArray[resArray.indexOf('sample_rate') + 1]; let channels = resArray[resArray.indexOf('channels') + 1]; let bitsPerSample = resArray[resArray.indexOf('bits_per_sample') + 1]; // check if file is a wav or flac audio file let iExt = file.name.lastIndexOf('.'); let ext = (iExt < 0) ? '' : file.name.substr(iExt); let duration; if (ext === '.wav') { duration = (file.size / (sampleRate * channels * (bitsPerSample / 8))) / 60; } else if (ext === '.flac') { duration = resArray[resArray.indexOf('duration') + 1]; } let audioConfig = { sampleRateHertz: sampleRate, languageCode: `en-US`, maxAlternatives: 0, enableWordTimeOffsets: true, useEnhanced: true, audioChannelCount: channels, enableSeparateRecognitionPerChannel: true, model: 'phone_call' }; // send audio file to STT, get job name, add to pub/sub object and publish message const audioRequest = { audio: audioPath, config: audioConfig, }; let pubSubObj = { 'filename': `gs://${file.bucket}/${file.name}`, 'duration': duration }; client .longRunningRecognize(audioRequest) .then(response => { const [operation, initialApiResponse] = response; return initialApiResponse; }) .then(response => { pubSubObj['sttnameid'] = response.name; // add STT 'name' ID to pubSubObj }) .then(() => { const pubSubData = JSON.stringify(pubSubObj); const dataBuffer = Buffer.from(pubSubData); const pubsub = new PubSub(); return pubsub .topic(topicName) .publisher() .publish(dataBuffer) .then(messageId => { console.log(`Message ${messageId} published.`); callback(null, 'Success!'); }) .catch(err => { console.error('ERROR:', err); }); }); }); }).catch(err => console.error(err)) };

srf-audio-process-func/index.js (96 lines of code) (raw):