functions/v2/ocr/app/index.js (117 lines of code) (raw):
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
'use strict';
// [START functions_ocr_setup]
// Get a reference to the Pub/Sub component
const {PubSub} = require('@google-cloud/pubsub');
const pubsub = new PubSub();
// Get a reference to the Cloud Storage component
const {Storage} = require('@google-cloud/storage');
const storage = new Storage();
// Get a reference to the Cloud Vision API component
const Vision = require('@google-cloud/vision');
const vision = new Vision.ImageAnnotatorClient();
// Get a reference to the Translate API component
const {Translate} = require('@google-cloud/translate').v2;
const translate = new Translate();
const functions = require('@google-cloud/functions-framework');
// [END functions_ocr_setup]
/**
* Publishes the result to the given pubsub topic and returns a Promise.
*
* @param {string} topicName Name of the topic on which to publish.
* @param {object} data The message data to publish.
*/
const publishResult = async (topicName, data) => {
const dataBuffer = Buffer.from(JSON.stringify(data));
const [topic] = await pubsub.topic(topicName).get({autoCreate: true});
topic.publishMessage({dataBuffer});
};
// [START functions_ocr_detect]
/**
* Detects the text in an image using the Google Vision API.
*
* @param {string} bucketName Cloud Storage bucket name.
* @param {string} filename Cloud Storage file name.
* @returns {Promise}
*/
const detectText = async (bucketName, filename) => {
console.log(`Looking for text in image ${filename}`);
const [textDetections] = await vision.textDetection(
`gs://${bucketName}/${filename}`
);
const [annotation] = textDetections.textAnnotations;
const text = annotation ? annotation.description.trim() : '';
console.log('Extracted text from image:', text);
let [translateDetection] = await translate.detect(text);
if (Array.isArray(translateDetection)) {
[translateDetection] = translateDetection;
}
console.log(
`Detected language "${translateDetection.language}" for ${filename}`
);
// Submit a message to the bus for each language we're going to translate to
const TO_LANGS = process.env.TO_LANG.split(',');
const topicName = process.env.TRANSLATE_TOPIC;
const tasks = TO_LANGS.map(lang => {
const messageData = {
text: text,
filename: filename,
lang: lang,
};
// Helper function that publishes translation result to a Pub/Sub topic
// For more information on publishing Pub/Sub messages, see this page:
// https://cloud.google.com/pubsub/docs/publisher
return publishResult(topicName, messageData);
});
return Promise.all(tasks);
};
// [END functions_ocr_detect]
/**
* Appends a .txt suffix to the image name.
*
* @param {string} filename Name of a file.
* @param {string} lang Language to append.
* @returns {string} The new filename.
*/
const renameImageForSave = (filename, lang) => {
return `${filename}_to_${lang}.txt`;
};
// [START functions_ocr_process]
/**
* This function is exported by index.js, and is executed when
* a file is uploaded to the Cloud Storage bucket you created
* for uploading images.
*
* @param {object} cloudEvent A CloudEvent containing the Cloud Storage File object.
* https://cloud.google.com/storage/docs/json_api/v1/objects
*/
functions.cloudEvent('processImage', async cloudEvent => {
const {bucket, name} = cloudEvent.data;
if (!bucket) {
throw new Error(
'Bucket not provided. Make sure you have a "bucket" property in your request'
);
}
if (!name) {
throw new Error(
'Filename not provided. Make sure you have a "name" property in your request'
);
}
await detectText(bucket, name);
console.log(`File ${name} processed.`);
});
// [END functions_ocr_process]
// [START functions_ocr_translate]
/**
* This function is exported by index.js, and is executed when
* a message is published to the Cloud Pub/Sub topic specified
* by the TRANSLATE_TOPIC environment variable. The function
* translates text using the Google Translate API.
*
* @param {object} cloudEvent The CloudEvent containing the Pub/Sub Message object
* https://cloud.google.com/storage/docs/json_api/v1/objects
*/
functions.cloudEvent('translateText', async cloudEvent => {
const pubsubData = cloudEvent.data;
const jsonStr = Buffer.from(pubsubData.message, 'base64').toString();
const {text, filename, lang} = JSON.parse(jsonStr);
if (!text) {
throw new Error(
'Text not provided. Make sure you have a "text" property in your request'
);
}
if (!filename) {
throw new Error(
'Filename not provided. Make sure you have a "filename" property in your request'
);
}
if (!lang) {
throw new Error(
'Language not provided. Make sure you have a "lang" property in your request'
);
}
console.log(`Translating text into ${lang}`);
const [translation] = await translate.translate(text, lang);
console.log('Translated text:', translation);
const messageData = {
text: translation,
filename: filename,
lang: lang,
};
await publishResult(process.env.RESULT_TOPIC, messageData);
console.log(`Text translated to ${lang}`);
});
// [END functions_ocr_translate]
// [START functions_ocr_save]
/**
* This function is exported by index.js, and is executed when
* a message is published to the Cloud Pub/Sub topic specified
* by the RESULT_TOPIC environment variable. The function saves
* the data packet to a file in GCS.
*
* @param {object} cloudEvent The CloudEvent containing the Pub/Sub Message object.
* https://cloud.google.com/storage/docs/json_api/v1/objects
*/
functions.cloudEvent('saveResult', async cloudEvent => {
const pubsubData = cloudEvent.data;
const jsonStr = Buffer.from(pubsubData.message, 'base64').toString();
const {text, filename, lang} = JSON.parse(jsonStr);
if (!text) {
throw new Error(
'Text not provided. Make sure you have a "text" property in your request'
);
}
if (!filename) {
throw new Error(
'Filename not provided. Make sure you have a "filename" property in your request'
);
}
if (!lang) {
throw new Error(
'Language not provided. Make sure you have a "lang" property in your request'
);
}
console.log(`Received request to save file ${filename}`);
const bucketName = process.env.RESULT_BUCKET;
const newFilename = renameImageForSave(filename, lang);
const file = storage.bucket(bucketName).file(newFilename);
console.log(`Saving result to ${newFilename} in bucket ${bucketName}`);
await file.save(text);
console.log('File saved.');
});
// [END functions_ocr_save]