vision/snippets/detect/beta_snippets.py (245 lines of code) (raw):
#!/usr/bin/env python
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Google Cloud Vision API Python Beta Snippets
Example Usage:
python beta_snippets.py -h
python beta_snippets.py object-localization INPUT_IMAGE
python beta_snippets.py object-localization-uri gs://...
python beta_snippets.py handwritten-ocr INPUT_IMAGE
python beta_snippets.py handwritten-ocr-uri gs://...
python beta_snippets.py batch-annotate-files INPUT_PDF
python beta_snippets.py batch-annotate-files-uri gs://...
python beta_snippets.py batch-annotate-images-uri gs://... gs://...
For more information, the documentation at
https://cloud.google.com/vision/docs.
"""
import argparse
# [START vision_localize_objects_beta]
def localize_objects(path):
"""Localize objects in the local image.
Args:
path: The path to the local file.
"""
from google.cloud import vision_v1p3beta1 as vision
client = vision.ImageAnnotatorClient()
with open(path, "rb") as image_file:
content = image_file.read()
image = vision.Image(content=content)
objects = client.object_localization(image=image).localized_object_annotations
print(f"Number of objects found: {len(objects)}")
for object_ in objects:
print(f"\n{object_.name} (confidence: {object_.score})")
print("Normalized bounding polygon vertices: ")
for vertex in object_.bounding_poly.normalized_vertices:
print(f" - ({vertex.x}, {vertex.y})")
# [END vision_localize_objects_beta]
# [START vision_localize_objects_gcs_beta]
def localize_objects_uri(uri):
"""Localize objects in the image on Google Cloud Storage
Args:
uri: The path to the file in Google Cloud Storage (gs://...)
"""
from google.cloud import vision_v1p3beta1 as vision
client = vision.ImageAnnotatorClient()
image = vision.Image()
image.source.image_uri = uri
objects = client.object_localization(image=image).localized_object_annotations
print(f"Number of objects found: {len(objects)}")
for object_ in objects:
print(f"\n{object_.name} (confidence: {object_.score})")
print("Normalized bounding polygon vertices: ")
for vertex in object_.bounding_poly.normalized_vertices:
print(f" - ({vertex.x}, {vertex.y})")
# [END vision_localize_objects_gcs_beta]
# [START vision_handwritten_ocr_beta]
def detect_handwritten_ocr(path):
"""Detects handwritten characters in a local image.
Args:
path: The path to the local file.
"""
from google.cloud import vision_v1p3beta1 as vision
client = vision.ImageAnnotatorClient()
with open(path, "rb") as image_file:
content = image_file.read()
image = vision.Image(content=content)
# Language hint codes for handwritten OCR:
# en-t-i0-handwrit, mul-Latn-t-i0-handwrit
# Note: Use only one language hint code per request for handwritten OCR.
image_context = vision.ImageContext(language_hints=["en-t-i0-handwrit"])
response = client.document_text_detection(image=image, image_context=image_context)
print(f"Full Text: {response.full_text_annotation.text}")
for page in response.full_text_annotation.pages:
for block in page.blocks:
print(f"\nBlock confidence: {block.confidence}\n")
for paragraph in block.paragraphs:
print("Paragraph confidence: {}".format(paragraph.confidence))
for word in paragraph.words:
word_text = "".join([symbol.text for symbol in word.symbols])
print(
"Word text: {} (confidence: {})".format(
word_text, word.confidence
)
)
for symbol in word.symbols:
print(
"\tSymbol: {} (confidence: {})".format(
symbol.text, symbol.confidence
)
)
if response.error.message:
raise Exception(
"{}\nFor more info on error messages, check: "
"https://cloud.google.com/apis/design/errors".format(response.error.message)
)
# [END vision_handwritten_ocr_beta]
# [START vision_handwritten_ocr_gcs_beta]
def detect_handwritten_ocr_uri(uri):
"""Detects handwritten characters in the file located in Google Cloud
Storage.
Args:
uri: The path to the file in Google Cloud Storage (gs://...)
"""
from google.cloud import vision_v1p3beta1 as vision
client = vision.ImageAnnotatorClient()
image = vision.Image()
image.source.image_uri = uri
# Language hint codes for handwritten OCR:
# en-t-i0-handwrit, mul-Latn-t-i0-handwrit
# Note: Use only one language hint code per request for handwritten OCR.
image_context = vision.ImageContext(language_hints=["en-t-i0-handwrit"])
response = client.document_text_detection(image=image, image_context=image_context)
print(f"Full Text: {response.full_text_annotation.text}")
for page in response.full_text_annotation.pages:
for block in page.blocks:
print(f"\nBlock confidence: {block.confidence}\n")
for paragraph in block.paragraphs:
print("Paragraph confidence: {}".format(paragraph.confidence))
for word in paragraph.words:
word_text = "".join([symbol.text for symbol in word.symbols])
print(
"Word text: {} (confidence: {})".format(
word_text, word.confidence
)
)
for symbol in word.symbols:
print(
"\tSymbol: {} (confidence: {})".format(
symbol.text, symbol.confidence
)
)
if response.error.message:
raise Exception(
"{}\nFor more info on error messages, check: "
"https://cloud.google.com/apis/design/errors".format(response.error.message)
)
# [END vision_handwritten_ocr_gcs_beta]
# [START vision_batch_annotate_files_beta]
def detect_batch_annotate_files(path):
"""Detects document features in a PDF/TIFF/GIF file.
While your PDF file may have several pages,
this API can process up to 5 pages only.
Args:
path: The path to the local file.
"""
from google.cloud import vision_v1p4beta1 as vision
client = vision.ImageAnnotatorClient()
with open(path, "rb") as pdf_file:
content = pdf_file.read()
# Other supported mime_types: image/tiff' or 'image/gif'
mime_type = "application/pdf"
input_config = vision.InputConfig(content=content, mime_type=mime_type)
feature = vision.Feature(type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
# Annotate the first two pages and the last one (max 5 pages)
# First page starts at 1, and not 0. Last page is -1.
pages = [1, 2, -1]
request = vision.AnnotateFileRequest(
input_config=input_config, features=[feature], pages=pages
)
response = client.batch_annotate_files(requests=[request])
for image_response in response.responses[0].responses:
for page in image_response.full_text_annotation.pages:
for block in page.blocks:
print(f"\nBlock confidence: {block.confidence}\n")
for par in block.paragraphs:
print(f"\tParagraph confidence: {par.confidence}")
for word in par.words:
symbol_texts = [symbol.text for symbol in word.symbols]
word_text = "".join(symbol_texts)
print(
"\t\tWord text: {} (confidence: {})".format(
word_text, word.confidence
)
)
for symbol in word.symbols:
print(
"\t\t\tSymbol: {} (confidence: {})".format(
symbol.text, symbol.confidence
)
)
# [END vision_batch_annotate_files_beta]
# [START vision_batch_annotate_files_gcs_beta]
def detect_batch_annotate_files_uri(gcs_uri):
"""Detects document features in a PDF/TIFF/GIF file.
While your PDF file may have several pages,
this API can process up to 5 pages only.
Args:
uri: The path to the file in Google Cloud Storage (gs://...)
"""
from google.cloud import vision_v1p4beta1 as vision
client = vision.ImageAnnotatorClient()
# Other supported mime_types: image/tiff' or 'image/gif'
mime_type = "application/pdf"
input_config = vision.InputConfig(
gcs_source=vision.GcsSource(uri=gcs_uri), mime_type=mime_type
)
feature = vision.Feature(type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
# Annotate the first two pages and the last one (max 5 pages)
# First page starts at 1, and not 0. Last page is -1.
pages = [1, 2, -1]
request = vision.AnnotateFileRequest(
input_config=input_config, features=[feature], pages=pages
)
response = client.batch_annotate_files(requests=[request])
for image_response in response.responses[0].responses:
for page in image_response.full_text_annotation.pages:
for block in page.blocks:
print(f"\nBlock confidence: {block.confidence}\n")
for par in block.paragraphs:
print(f"\tParagraph confidence: {par.confidence}")
for word in par.words:
symbol_texts = [symbol.text for symbol in word.symbols]
word_text = "".join(symbol_texts)
print(
"\t\tWord text: {} (confidence: {})".format(
word_text, word.confidence
)
)
for symbol in word.symbols:
print(
"\t\t\tSymbol: {} (confidence: {})".format(
symbol.text, symbol.confidence
)
)
# [END vision_batch_annotate_files_gcs_beta]
# [START vision_async_batch_annotate_images_beta]
def async_batch_annotate_images_uri(input_image_uri, output_uri):
"""Batch annotation of images on Google Cloud Storage asynchronously.
Args:
input_image_uri: The path to the image in Google Cloud Storage (gs://...)
output_uri: The path to the output path in Google Cloud Storage (gs://...)
"""
import re
from google.cloud import storage
from google.cloud import vision_v1p4beta1 as vision
client = vision.ImageAnnotatorClient()
# Construct the request for the image(s) to be annotated:
image_source = vision.ImageSource(image_uri=input_image_uri)
image = vision.Image(source=image_source)
features = [
vision.Feature(type_=vision.Feature.Type.LABEL_DETECTION),
vision.Feature(type_=vision.Feature.Type.TEXT_DETECTION),
vision.Feature(type_=vision.Feature.Type.IMAGE_PROPERTIES),
]
requests = [
vision.AnnotateImageRequest(image=image, features=features),
]
gcs_destination = vision.GcsDestination(uri=output_uri)
output_config = vision.OutputConfig(gcs_destination=gcs_destination, batch_size=2)
operation = client.async_batch_annotate_images(
requests=requests, output_config=output_config
)
print("Waiting for the operation to finish.")
operation.result(timeout=10000)
# Once the request has completed and the output has been
# written to Google Cloud Storage, we can list all the output files.
storage_client = storage.Client()
match = re.match(r"gs://([^/]+)/(.+)", output_uri)
bucket_name = match.group(1)
prefix = match.group(2)
bucket = storage_client.get_bucket(bucket_name)
# Lists objects with the given prefix.
blob_list = list(bucket.list_blobs(prefix=prefix))
print("Output files:")
for blob in blob_list:
print(blob.name)
# Processes the first output file from Google Cloud Storage.
# Since we specified batch_size=2, the first response contains
# annotations for the first two annotate image requests.
output = blob_list[0]
json_string = output.download_as_bytes().decode("utf-8")
response = vision.BatchAnnotateImagesResponse.from_json(json_string)
# Prints the actual response for the first annotate image request.
print(
"The annotation response for the first request: {}".format(
response.responses[0]
)
)
# [END vision_async_batch_annotate_images_beta]
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(dest="command")
object_parser = subparsers.add_parser(
"object-localization", help=localize_objects.__doc__
)
object_parser.add_argument("path")
object_uri_parser = subparsers.add_parser(
"object-localization-uri", help=localize_objects_uri.__doc__
)
object_uri_parser.add_argument("uri")
handwritten_parser = subparsers.add_parser(
"handwritten-ocr", help=detect_handwritten_ocr.__doc__
)
handwritten_parser.add_argument("path")
handwritten_uri_parser = subparsers.add_parser(
"handwritten-ocr-uri", help=detect_handwritten_ocr_uri.__doc__
)
handwritten_uri_parser.add_argument("uri")
batch_annotate_parser = subparsers.add_parser(
"batch-annotate-files", help=detect_batch_annotate_files.__doc__
)
batch_annotate_parser.add_argument("path")
batch_annotate_uri_parser = subparsers.add_parser(
"batch-annotate-files-uri", help=detect_batch_annotate_files_uri.__doc__
)
batch_annotate_uri_parser.add_argument("uri")
batch_annotate__image_uri_parser = subparsers.add_parser(
"batch-annotate-images-uri", help=async_batch_annotate_images_uri.__doc__
)
batch_annotate__image_uri_parser.add_argument("uri")
batch_annotate__image_uri_parser.add_argument("output")
args = parser.parse_args()
if "uri" in args.command:
if "object-localization-uri" in args.command:
localize_objects_uri(args.uri)
elif "handwritten-ocr-uri" in args.command:
detect_handwritten_ocr_uri(args.uri)
elif "batch-annotate-files-uri" in args.command:
detect_batch_annotate_files_uri(args.uri)
elif "batch-annotate-images-uri" in args.command:
async_batch_annotate_images_uri(args.uri, args.output)
else:
if "object-localization" in args.command:
localize_objects(args.path)
elif "handwritten-ocr" in args.command:
detect_handwritten_ocr(args.path)
elif "batch-annotate-files" in args.command:
detect_batch_annotate_files(args.path)