video-intelligence/analyze.js (465 lines of code) (raw):

// Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. 'use strict'; async function analyzeLabelsGCS(gcsUri) { // [START video_analyze_labels_gcs] // Imports the Google Cloud Video Intelligence library const video = require('@google-cloud/video-intelligence').v1; // Creates a client const client = new video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const gcsUri = 'GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4'; const request = { inputUri: gcsUri, features: ['LABEL_DETECTION'], }; // Detects labels in a video const [operation] = await client.annotateVideo(request); console.log('Waiting for operation to complete...'); const [operationResult] = await operation.promise(); // Gets annotations for video const annotations = operationResult.annotationResults[0]; const labels = annotations.segmentLabelAnnotations; labels.forEach(label => { console.log(`Label ${label.entity.description} occurs at:`); label.segments.forEach(segment => { const time = segment.segment; if (time.startTimeOffset.seconds === undefined) { time.startTimeOffset.seconds = 0; } if (time.startTimeOffset.nanos === undefined) { time.startTimeOffset.nanos = 0; } if (time.endTimeOffset.seconds === undefined) { time.endTimeOffset.seconds = 0; } if (time.endTimeOffset.nanos === undefined) { time.endTimeOffset.nanos = 0; } console.log( `\tStart: ${time.startTimeOffset.seconds}` + `.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log( `\tEnd: ${time.endTimeOffset.seconds}.` + `${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log(`\tConfidence: ${segment.confidence}`); }); }); // [END video_analyze_labels_gcs] } async function analyzeLabelsLocal(path) { // [START video_analyze_labels] // Imports the Google Cloud Video Intelligence library + Node's fs library const video = require('@google-cloud/video-intelligence').v1; const fs = require('fs'); const util = require('util'); // Creates a client const client = new video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const path = 'Local file to analyze, e.g. ./my-file.mp4'; // Reads a local video file and converts it to base64 const readFile = util.promisify(fs.readFile); const file = await readFile(path); const inputContent = file.toString('base64'); // Constructs request const request = { inputContent: inputContent, features: ['LABEL_DETECTION'], }; // Detects labels in a video const [operation] = await client.annotateVideo(request); console.log('Waiting for operation to complete...'); const [operationResult] = await operation.promise(); // Gets annotations for video const annotations = operationResult.annotationResults[0]; const labels = annotations.segmentLabelAnnotations; labels.forEach(label => { console.log(`Label ${label.entity.description} occurs at:`); label.segments.forEach(segment => { const time = segment.segment; if (time.startTimeOffset.seconds === undefined) { time.startTimeOffset.seconds = 0; } if (time.startTimeOffset.nanos === undefined) { time.startTimeOffset.nanos = 0; } if (time.endTimeOffset.seconds === undefined) { time.endTimeOffset.seconds = 0; } if (time.endTimeOffset.nanos === undefined) { time.endTimeOffset.nanos = 0; } console.log( `\tStart: ${time.startTimeOffset.seconds}` + `.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log( `\tEnd: ${time.endTimeOffset.seconds}.` + `${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log(`\tConfidence: ${segment.confidence}`); }); }); // [END video_analyze_labels] } async function analyzeShots(gcsUri) { // [START video_analyze_shots] // Imports the Google Cloud Video Intelligence library const video = require('@google-cloud/video-intelligence').v1; // Creates a client const client = new video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const gcsUri = 'GCS URI of file to analyze, e.g. gs://my-bucket/my-video.mp4'; const request = { inputUri: gcsUri, features: ['SHOT_CHANGE_DETECTION'], }; // Detects camera shot changes const [operation] = await client.annotateVideo(request); console.log('Waiting for operation to complete...'); const [operationResult] = await operation.promise(); // Gets shot changes const shotChanges = operationResult.annotationResults[0].shotAnnotations; console.log('Shot changes:'); if (shotChanges.length === 1) { console.log('The entire video is one shot.'); } else { shotChanges.forEach((shot, shotIdx) => { console.log(`Scene ${shotIdx} occurs from:`); if (shot.startTimeOffset === undefined) { shot.startTimeOffset = {}; } if (shot.endTimeOffset === undefined) { shot.endTimeOffset = {}; } if (shot.startTimeOffset.seconds === undefined) { shot.startTimeOffset.seconds = 0; } if (shot.startTimeOffset.nanos === undefined) { shot.startTimeOffset.nanos = 0; } if (shot.endTimeOffset.seconds === undefined) { shot.endTimeOffset.seconds = 0; } if (shot.endTimeOffset.nanos === undefined) { shot.endTimeOffset.nanos = 0; } console.log( `\tStart: ${shot.startTimeOffset.seconds}` + `.${(shot.startTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log( `\tEnd: ${shot.endTimeOffset.seconds}.` + `${(shot.endTimeOffset.nanos / 1e6).toFixed(0)}s` ); }); } // [END video_analyze_shots] } async function analyzeSafeSearch(gcsUri) { // [START video_analyze_explicit_content] // Imports the Google Cloud Video Intelligence library const video = require('@google-cloud/video-intelligence').v1; // Creates a client const client = new video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const gcsUri = 'GCS URI of video to analyze, e.g. gs://my-bucket/my-video.mp4'; const request = { inputUri: gcsUri, features: ['EXPLICIT_CONTENT_DETECTION'], }; // Human-readable likelihoods const likelihoods = [ 'UNKNOWN', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY', ]; // Detects unsafe content const [operation] = await client.annotateVideo(request); console.log('Waiting for operation to complete...'); const [operationResult] = await operation.promise(); // Gets unsafe content const explicitContentResults = operationResult.annotationResults[0].explicitAnnotation; console.log('Explicit annotation results:'); explicitContentResults.frames.forEach(result => { if (result.timeOffset === undefined) { result.timeOffset = {}; } if (result.timeOffset.seconds === undefined) { result.timeOffset.seconds = 0; } if (result.timeOffset.nanos === undefined) { result.timeOffset.nanos = 0; } console.log( `\tTime: ${result.timeOffset.seconds}` + `.${(result.timeOffset.nanos / 1e6).toFixed(0)}s` ); console.log( `\t\tPornography likelihood: ${likelihoods[result.pornographyLikelihood]}` ); }); // [END video_analyze_explicit_content] } async function analyzeVideoTranscription(gcsUri) { // [START video_speech_transcription_gcs] // Imports the Google Cloud Video Intelligence library const videoIntelligence = require('@google-cloud/video-intelligence'); // Creates a client const client = new videoIntelligence.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const gcsUri = 'GCS URI of video to analyze, e.g. gs://my-bucket/my-video.mp4'; async function analyzeVideoTranscript() { const videoContext = { speechTranscriptionConfig: { languageCode: 'en-US', enableAutomaticPunctuation: true, }, }; const request = { inputUri: gcsUri, features: ['SPEECH_TRANSCRIPTION'], videoContext: videoContext, }; const [operation] = await client.annotateVideo(request); console.log('Waiting for operation to complete...'); const [operationResult] = await operation.promise(); // There is only one annotation_result since only // one video is processed. const annotationResults = operationResult.annotationResults[0]; for (const speechTranscription of annotationResults.speechTranscriptions) { // The number of alternatives for each transcription is limited by // SpeechTranscriptionConfig.max_alternatives. // Each alternative is a different possible transcription // and has its own confidence score. for (const alternative of speechTranscription.alternatives) { console.log('Alternative level information:'); console.log(`Transcript: ${alternative.transcript}`); console.log(`Confidence: ${alternative.confidence}`); console.log('Word level information:'); for (const wordInfo of alternative.words) { const word = wordInfo.word; const start_time = wordInfo.startTime.seconds + wordInfo.startTime.nanos * 1e-9; const end_time = wordInfo.endTime.seconds + wordInfo.endTime.nanos * 1e-9; console.log('\t' + start_time + 's - ' + end_time + 's: ' + word); } } } } analyzeVideoTranscript(); // [END video_speech_transcription_gcs] } async function analyzeTextGCS(gcsUri) { //gcsUri - GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4 //[START video_detect_text_gcs] // Imports the Google Cloud Video Intelligence library const Video = require('@google-cloud/video-intelligence'); // Creates a client const video = new Video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const gcsUri = 'GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4'; const request = { inputUri: gcsUri, features: ['TEXT_DETECTION'], }; // Detects text in a video const [operation] = await video.annotateVideo(request); const results = await operation.promise(); console.log('Waiting for operation to complete...'); // Gets annotations for video const textAnnotations = results[0].annotationResults[0].textAnnotations; textAnnotations.forEach(textAnnotation => { console.log(`Text ${textAnnotation.text} occurs at:`); textAnnotation.segments.forEach(segment => { const time = segment.segment; console.log( ` Start: ${time.startTimeOffset.seconds || 0}.${( time.startTimeOffset.nanos / 1e6 ).toFixed(0)}s` ); console.log( ` End: ${time.endTimeOffset.seconds || 0}.${( time.endTimeOffset.nanos / 1e6 ).toFixed(0)}s` ); console.log(` Confidence: ${segment.confidence}`); segment.frames.forEach(frame => { const timeOffset = frame.timeOffset; console.log( `Time offset for the frame: ${timeOffset.seconds || 0}` + `.${(timeOffset.nanos / 1e6).toFixed(0)}s` ); console.log('Rotated Bounding Box Vertices:'); frame.rotatedBoundingBox.vertices.forEach(vertex => { console.log(`Vertex.x:${vertex.x}, Vertex.y:${vertex.y}`); }); }); }); }); // [END video_detect_text_gcs] } async function analyzeObjectTrackingGCS(gcsUri) { //gcsUri - GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4 //[START video_object_tracking_gcs] // Imports the Google Cloud Video Intelligence library const Video = require('@google-cloud/video-intelligence'); // Creates a client const video = new Video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const gcsUri = 'GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4'; const request = { inputUri: gcsUri, features: ['OBJECT_TRACKING'], //recommended to use us-east1 for the best latency due to different types of processors used in this region and others locationId: 'us-east1', }; // Detects objects in a video const [operation] = await video.annotateVideo(request); const results = await operation.promise(); console.log('Waiting for operation to complete...'); //Gets annotations for video const annotations = results[0].annotationResults[0]; const objects = annotations.objectAnnotations; objects.forEach(object => { console.log(`Entity description: ${object.entity.description}`); console.log(`Entity id: ${object.entity.entityId}`); const time = object.segment; console.log( `Segment: ${time.startTimeOffset.seconds || 0}` + `.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s to ${ time.endTimeOffset.seconds || 0 }.` + `${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log(`Confidence: ${object.confidence}`); const frame = object.frames[0]; const box = frame.normalizedBoundingBox; const timeOffset = frame.timeOffset; console.log( `Time offset for the first frame: ${timeOffset.seconds || 0}` + `.${(timeOffset.nanos / 1e6).toFixed(0)}s` ); console.log('Bounding box position:'); console.log(` left :${box.left}`); console.log(` top :${box.top}`); console.log(` right :${box.right}`); console.log(` bottom :${box.bottom}`); }); // [END video_object_tracking_gcs] } async function analyzeText(path) { //[START video_detect_text] // Imports the Google Cloud Video Intelligence library + Node's fs library const Video = require('@google-cloud/video-intelligence'); const fs = require('fs'); const util = require('util'); // Creates a client const video = new Video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const path = 'Local file to analyze, e.g. ./my-file.mp4'; // Reads a local video file and converts it to base64 const file = await util.promisify(fs.readFile)(path); const inputContent = file.toString('base64'); const request = { inputContent: inputContent, features: ['TEXT_DETECTION'], }; // Detects text in a video const [operation] = await video.annotateVideo(request); const results = await operation.promise(); console.log('Waiting for operation to complete...'); // Gets annotations for video const textAnnotations = results[0].annotationResults[0].textAnnotations; textAnnotations.forEach(textAnnotation => { console.log(`Text ${textAnnotation.text} occurs at:`); textAnnotation.segments.forEach(segment => { const time = segment.segment; if (time.startTimeOffset.seconds === undefined) { time.startTimeOffset.seconds = 0; } if (time.startTimeOffset.nanos === undefined) { time.startTimeOffset.nanos = 0; } if (time.endTimeOffset.seconds === undefined) { time.endTimeOffset.seconds = 0; } if (time.endTimeOffset.nanos === undefined) { time.endTimeOffset.nanos = 0; } console.log( `\tStart: ${time.startTimeOffset.seconds || 0}` + `.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log( `\tEnd: ${time.endTimeOffset.seconds || 0}.` + `${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log(`\tConfidence: ${segment.confidence}`); segment.frames.forEach(frame => { const timeOffset = frame.timeOffset; console.log( `Time offset for the frame: ${timeOffset.seconds || 0}` + `.${(timeOffset.nanos / 1e6).toFixed(0)}s` ); console.log('Rotated Bounding Box Vertices:'); frame.rotatedBoundingBox.vertices.forEach(vertex => { console.log(`Vertex.x:${vertex.x}, Vertex.y:${vertex.y}`); }); }); }); }); // [END video_detect_text] } async function analyzeObjectTracking(path) { //[START video_object_tracking] // Imports the Google Cloud Video Intelligence library const Video = require('@google-cloud/video-intelligence'); const fs = require('fs'); const util = require('util'); // Creates a client const video = new Video.VideoIntelligenceServiceClient(); /** * TODO(developer): Uncomment the following line before running the sample. */ // const path = 'Local file to analyze, e.g. ./my-file.mp4'; // Reads a local video file and converts it to base64 const file = await util.promisify(fs.readFile)(path); const inputContent = file.toString('base64'); const request = { inputContent: inputContent, features: ['OBJECT_TRACKING'], //recommended to use us-east1 for the best latency due to different types of processors used in this region and others locationId: 'us-east1', }; // Detects objects in a video const [operation] = await video.annotateVideo(request); const results = await operation.promise(); console.log('Waiting for operation to complete...'); //Gets annotations for video const annotations = results[0].annotationResults[0]; const objects = annotations.objectAnnotations; objects.forEach(object => { console.log(`Entity description: ${object.entity.description}`); console.log(`Entity id: ${object.entity.entityId}`); const time = object.segment; console.log( `Segment: ${time.startTimeOffset.seconds || 0}` + `.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s to ${ time.endTimeOffset.seconds || 0 }.` + `${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s` ); console.log(`Confidence: ${object.confidence}`); const frame = object.frames[0]; const box = frame.normalizedBoundingBox; const timeOffset = frame.timeOffset; console.log( `Time offset for the first frame: ${timeOffset.seconds || 0}` + `.${(timeOffset.nanos / 1e6).toFixed(0)}s` ); console.log('Bounding box position:'); console.log(` left :${box.left}`); console.log(` top :${box.top}`); console.log(` right :${box.right}`); console.log(` bottom :${box.bottom}`); }); // [END video_object_tracking] } async function main() { require('yargs') .demand(1) .command( 'shots <gcsUri>', 'Analyzes shot angles in a video stored in Google Cloud Storage using the Cloud Video Intelligence API.', {}, opts => analyzeShots(opts.gcsUri) ) .command( 'labels-gcs <gcsUri>', 'Labels objects in a video stored in Google Cloud Storage using the Cloud Video Intelligence API.', {}, opts => analyzeLabelsGCS(opts.gcsUri) ) .command( 'labels-file <filePath>', 'Labels objects in a video stored locally using the Cloud Video Intelligence API.', {}, opts => analyzeLabelsLocal(opts.filePath) ) .command( 'safe-search <gcsUri>', 'Detects explicit content in a video stored in Google Cloud Storage.', {}, opts => analyzeSafeSearch(opts.gcsUri) ) .command( 'transcription <gcsUri>', 'Extract the video transcription using the Cloud Video Intelligence API.', {}, opts => analyzeVideoTranscription(opts.gcsUri) ) .command( 'video-text-gcs <gcsUri>', 'Analyzes text in a video stored in Google Cloud Storage using the Cloud Video Intelligence API.', {}, opts => analyzeTextGCS(opts.gcsUri) ) .command( 'track-objects-gcs <gcsUri>', 'Analyzes objects in a video stored in Google Cloud Storage using the Cloud Video Intelligence API.', {}, opts => analyzeObjectTrackingGCS(opts.gcsUri) ) .command( 'video-text <path>', 'Analyzes text in a video stored in a local file using the Cloud Video Intelligence API.', {}, opts => analyzeText(opts.path) ) .command( 'track-objects <path>', 'Analyzes objects in a video stored in a local file using the Cloud Video Intelligence API.', {}, opts => analyzeObjectTracking(opts.path) ) .example('node $0 shots gs://cloud-samples-data/video/googlework_short.mp4') .example('node $0 labels-gcs gs://cloud-samples-data/video/cat.mp4') .example('node $0 labels-file googlework_short.mp4') .example( 'node $0 safe-search gs://cloud-samples-data/video/googlework_short.mp4' ) .example('node $0 transcription gs://cloud-samples-data/video/cat.mp4') .example('node $0 video-text ./resources/googlework_short.mp4') .example( 'node $0 video-text-gcs gs://nodejs-docs-samples/video/googlework_short.mp4' ) .example('node $0 track-objects ./resources/googlework_short.mp4') .example('node $0 track-objects-gcs gs://nodejs-docs-samples/video/cat.mp4') .wrap(120) .recommendCommands() .epilogue( 'For more information, see https://cloud.google.com/video-intelligence/docs' ) .help() .strict().argv; } main().catch(console.error);