vision/textDetection.js (180 lines of code) (raw):
// Copyright 2016 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
'use strict';
const fs = require('fs').promises;
const path = require('path');
const vision = require('@google-cloud/vision');
const natural = require('natural');
const redis = require('redis');
// By default, the client will authenticate using the service account file
// specified by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use
// the project specified by the GCLOUD_PROJECT environment variable. See
// https://cloud.google.com/docs/authentication/getting-started#setting_the_environment_variable
// Instantiate a vision client
const client = new vision.ImageAnnotatorClient();
/**
* State manager for text processing. Stores and reads results from Redis.
*/
class Index {
/**
* Create a new Index object.
*/
constructor() {
// Connect to a redis server.
const TOKEN_DB = 0;
const DOCS_DB = 1;
const PORT = process.env.REDIS_PORT || '6379';
const HOST = process.env.REDIS_HOST || '127.0.0.1';
this.tokenClient = redis
.createClient({url: `redis://${HOST}:${PORT}`, db: TOKEN_DB})
.on('error', err => {
console.error('ERR:REDIS: ' + err);
throw err;
});
this.docsClient = redis
.createClient({url: `redis://${HOST}:${PORT}`, db: DOCS_DB})
.on('error', err => {
console.error('ERR:REDIS: ' + err);
throw err;
});
(async () => {
await this.tokenClient.connect();
await this.docsClient.connect();
})();
}
/**
* Close all active redis server connections.
*/
quit() {
this.tokenClient.quit();
this.docsClient.quit();
}
/**
* Tokenize the given document.
* @param {string} filename - key for the storage in redis
* @param {string} document - Collection of words to be tokenized
* @returns {Promise<void>}
*/
async add(filename, document) {
const PUNCTUATION = ['.', ',', ':', ''];
const tokenizer = new natural.WordTokenizer();
const tokens = tokenizer.tokenize(document);
// filter out punctuation, then add all tokens to a redis set.
await Promise.all(
tokens
.filter(token => PUNCTUATION.indexOf(token) === -1)
.map(token => this.tokenClient.sAdd(token, filename))
);
await this.docsClient.set(filename, document);
}
/**
* Lookup files that contain a given set of words in redis
* @param {string[]} words An array of words to lookup
* @returns {Promise<string[][]>} Words and their arrays of matching filenames
*/
async lookup(words) {
return Promise.all(
words
.map(word => word.toLowerCase())
.map(word => this.tokenClient.sMembers(word))
);
}
/**
* Check to see if a Document is already stored in redis.
* @param {string} filename
* @returns {Promise<boolean>}
*/
async documentIsProcessed(filename) {
const value = await this.docsClient.get(filename);
if (value) {
console.log(`${filename} already added to index.`);
return true;
}
if (value === '') {
console.log(`${filename} was already checked, and contains no text.`);
return true;
}
return false;
}
/**
* Updates a given doc to have no text in redis.
* @param {string} filename
*/
async setContainsNoText(filename) {
await this.docsClient.set(filename, '');
}
}
/**
* Given a list of words, lookup any matches in the database.
* @param {string[]} words
* @returns {Promise<string[][]>}
*/
async function lookup(words) {
const index = new Index();
const hits = await index.lookup(words);
index.quit();
words.forEach((word, i) => {
console.log(`hits for "${word}":`, hits[i].join(', '));
});
return hits;
}
/**
* Provide a joined string with all descriptions from the response data
* @param {TextAnnotation[]} texts Response data from the Vision API
* @returns {string} A joined string containing al descriptions
*/
function extractDescription(texts) {
let document = '';
texts.forEach(text => {
document += text.description || '';
});
return document.toLowerCase();
}
/**
* Grab the description, and push it into redis.
* @param {string} filename Name of the file being processed
* @param {Index} index The Index object that wraps Redis
* @param {*} response Individual response from the Cloud Vision API
* @returns {Promise<void>}
*/
async function extractDescriptions(filename, index, response) {
if (response.textAnnotations.length) {
const words = extractDescription(response.textAnnotations);
await index.add(filename, words);
} else {
console.log(`${filename} had no discernable text.`);
await index.setContainsNoText(filename);
}
}
/**
* Given a set of image file paths, extract the text and run them through the
* Cloud Vision API.
* @param {Index} index The stateful `Index` Object.
* @param {string[]} inputFiles The list of files to process.
* @returns {Promise<void>}
*/
async function getTextFromFiles(index, inputFiles) {
// Read all of the given files and provide request objects that will be
// passed to the Cloud Vision API in a batch request.
const requests = await Promise.all(
inputFiles.map(async filename => {
const content = await fs.readFile(filename);
console.log(` 👉 ${filename}`);
return {
image: {
content: content.toString('base64'),
},
features: [{type: 'TEXT_DETECTION'}],
};
})
);
// Make a call to the Vision API to detect text
const results = await client.batchAnnotateImages({requests});
const detections = results[0].responses;
await Promise.all(
inputFiles.map(async (filename, i) => {
const response = detections[i];
if (response.error) {
console.info(`API Error for ${filename}`, response.error);
return;
}
await extractDescriptions(filename, index, response);
})
);
}
/**
* Main entry point for the program.
* @param {string} inputDir The directory in which to run the sample.
* @returns {Promise<void>}
*/
async function main(inputDir) {
const index = new Index();
try {
const files = await fs.readdir(inputDir);
// Get a list of all files in the directory (filter out other directories)
const allImageFiles = (
await Promise.all(
files.map(async file => {
const filename = path.join(inputDir, file);
const stats = await fs.stat(filename);
if (!stats.isDirectory()) {
return filename;
}
})
)
).filter(f => !!f);
// Figure out which files have already been processed
let imageFilesToProcess = (
await Promise.all(
allImageFiles.map(async filename => {
const processed = await index.documentIsProcessed(filename);
if (!processed) {
// Forward this filename on for further processing
return filename;
}
})
)
).filter(file => !!file);
// The batch endpoint won't handle
if (imageFilesToProcess.length > 15) {
console.log(
'Maximum of 15 images allowed. Analyzing the first 15 found.'
);
imageFilesToProcess = imageFilesToProcess.slice(0, 15);
}
// Analyze any remaining unprocessed files
if (imageFilesToProcess.length > 0) {
console.log('Files to process: ');
await getTextFromFiles(index, imageFilesToProcess);
}
console.log('All files processed!');
} catch (e) {
console.error(e);
}
index.quit();
}
const usage =
'Usage: node textDetection <command> <arg> ... \n\n Commands: analyze, lookup';
if (process.argv.length < 3) {
throw new Error(usage);
}
const args = process.argv.slice(2);
const command = args.shift();
if (command === 'analyze') {
if (!args.length) {
throw new Error('Usage: node textDetection analyze <dir>');
}
main(args[0]).catch(console.error);
} else if (command === 'lookup') {
if (!args.length) {
throw new Error('Usage: node textDetection lookup <word> ...');
}
lookup(args).catch(console.error);
} else {
throw new Error(usage);
}