tools/genai-prod-catalog-enrichment/genai_helper.py (547 lines of code) (raw):
# gen AI helper
import re
import json
import ast
import traceback
import fitz
import vertexai
from vertexai.language_models import TextGenerationModel
from google.cloud import storage
# from pdf_helper import *
def get_blocked_response_template():
blocked_response = {
"response_error": {
"is_blocked": True,
"safety_attributes": {},
"message": "The response is blocked because the input or "
"response potentially violates Google’s policies. "
"Try rephrasing the prompt or "
"adjusting the parameter settings."
}
}
return blocked_response
def get_failed_faq_template():
failed_faq = {
"catalogue_faqs": [
{
"response_error": "",
"llm_response": ""
}
]
}
return failed_faq
def get_model_response(prompt, project):
"""API request to PaLM 2 LLM"""
vertexai.init(project=project, location="us-central1")
parameters = {
"candidate_count": 1,
"max_output_tokens": 2048,
"temperature": 0,
"top_p": 0.8,
"top_k": 40
}
model = TextGenerationModel.from_pretrained("text-bison")
response = model.predict(prompt, **parameters)
return response
def get_prompt(context, task, error="", product_name=""):
"""Gets the prompt for the given task."""
# FAQ's, ISQ's constants
faq_json_format = """{
"catalogue_faqs": [
{"question": "What is the size of the paper
napkin produced by this machine?",
"answer": "The paper napkin produced by
this machine is 30 X 30 cm."},
{"question": "What is the speed of this machine?",
"answer": "This machine can produce 2,50,000 pieces in 8 hours."},
{"question": "How many colors can this machine print?",
"answer": "This machine can print up to 2 colors."},
{"question": "What is the weight of this machine?",
"answer": "This machine weighs approximately 2500 kgs."},
{"question": "How many people are required
to operate this machine?",
"answer": "This machine requires one
operator and one helper to operate."},
{"question": "What type of raw material does this machine use?",
"answer": "This machine uses tissue paper with a gsm of 12 to 30."}
]
}"""
sample_json_response = {
"product_name": [
"SINGLE SIZE PAPER NAPKIN MACHINE",
],
"specifications": [
{
"SINGLE SIZE PAPER NAPKIN MACHINE": {
"Size": "30 X 30 CM",
"No of Printing": "As Per Requirement",
"Embossing Unit": "As Per Requirement",
"Motor": [
"3hp motor with variable"
" AC drive with VDF (Variable frequency drive)",
"1hp motor (AC)"
],
"Speed": "2,50,000 PIECES / 8 HOURS",
"Weight": "2500 kgs (approx)",
"Man Power": "One operator & One helper",
"Raw Material": "Tissue paper 12 to 30 gsm",
"Counting": "digital",
"PRICE WITHOUT PRINT": "4,50,000",
"1 COLOUR PRINT": "5,75,000",
"2 COLOUR PRINT": "6,25,000"
}}],
"confidence_score": 0.8
}
company_details_format = {
"company_details": {
"company_name": "Global Conversion Machines",
"company_description": "",
"company_phone_number": {
"SALES TEAM": "+ 91 958 215 2344",
"MARKETING TEAM": "+ 91 874 482 8924",
"SERVICE TEAM": "+ 91 888 291 3467"
},
"company_email": "globalconversionmachines@gmail.com",
"company_website": "",
"company_social_handles": {
"twitter": "@globalcmachines",
"instagram": "@globalconversionmachines",
"youtube": "Global Conversion Machines"
},
"company_address": ""
}
}
# blocked_response = get_blocked_response_template()
malformed_json = """
{
'product_name': ['Acrylic Body Rotameter'],
'specifications': [
{'Acrylic Body Rotameter': {
'Metering Tube': 'Solid Acrylic Block',
'Body': 'Imported transparent acrylic block',
'Wetted Parts': 'M.S. / S.S. / P.P. / Teflon',
'End Connection': 'S.S. 304/316/PVC/PP/MS',
'Scale': 'Engraved on body',
'Packing': 'Neoprene / Teflon / Silicon',
'Model': 'JP/ABR',
'Temperature': 'Max 60C',
'Pressure': 'Max 25 Kg/Cm',
'Available sizes': '1/8 to 100 NB',
'Flow Ranges': '2 to 60,000 LPH of water at
ambient temperature and 0.1 to 750 Nm/hr of air at NTP',
'End Connection': 'Screwed / Flanged / Hose Nipple',
'Orientation': 'Bottom Top / Rear Rear',
'Accuracy': '2% of FSD',
'Accessories': 'High & low flow alarms and 4-20
mA output on your request',
'THREDED CONNECTION': {
'Line Size Flow rate (BSP CONNECTION)':
'Water at amb.temp. in LPH',
'Minimum LPH': 'Maximum LPH',
'BSP 6': '60',
'BSP 200': '2000',
'BSP 400': '4000',
'1 BSP': '500',
'1.5 BSP': '1200',
'2 BSP': '2500'
}
}
},
'confidence_score': 0.8
}
"""
fixed_json = """
{
"product_name": [
"Acrylic Body Rotameter"
],
"specifications": [
{
"Acrylic Body Rotameter": {
"Metering Tube": "Solid Acrylic Block",
"Body": "Imported transparent acrylic block",
"Wetted Parts": "M.S. / S.S. / P.P. / Teflon",
"End Connection": "S.S. 304/316/PVC/PP/MS",
"Scale": "Engraved on body",
"Packing": "Neoprene / Teflon / Silicon",
"Model": "JP/ABR",
"Temperature": "Max 60C",
"Pressure": "Max 25 Kg/Cm",
"Available sizes": "1/8 to 100 NB",
"Flow Ranges": "2 to 60,000 LPH of water
at ambient temperature and 0.1
to 750 Nm/hr of air at NTP",
"End Connection": "Screwed / Flanged / Hose Nipple",
"Orientation": "Bottom Top / Rear Rear",
"Accuracy": "2% of FSD",
"Accessories": "High & low flow alarms and 4-20 mA output on your request",
"THREDED CONNECTION": {
"Line Size Flow rate (BSP CONNECTION)": "Water at amb.temp. in LPH",
"Minimum LPH": "Maximum LPH",
"BSP 6": "60",
"BSP 200": "2000",
"BSP 400": "4000",
"1 BSP": "500",
"1.5 BSP": "1200",
"2 BSP": "2500"
}
}
}
],
"confidence_score": 0.9
}
"""
non_woven_bag = {
"tags": ["Industrial Machine", "Bag Making Machine"],
"suggested_category": "Non Woven Bag Making Machine"
}
toilet_roll_machine = {
"tags": ["Industrial Machine",
"Paper Roll Machine", "Toilet Roll Machine"],
"suggested_category": "Toilet Roll Making Machine"
}
malformed_faq = """
{
"catalogue_faqs": [
{"question": "What is the purpose of this diagram?",
"answer": "This diagram shows the piping and wiring
schematic for an AO Smith heat pump water heater."},
{"question": "What are the different components
shown in the diagram?",
"answer": "The diagram shows the following components:
1) Hot water to rooms
2) Tank temp sensor
3) Hot water outlet
4) Flow switch
5) Vibration pads
6) Return water from rooms
7) Cold water inlet to heat pump
8) FFL note"},
{"question": "What are the different
steps involved in the operation of this system?",
"answer": "The steps involved in the
operation of this system are as follows:
1) Cold water enters the heat pump through the cold water inlet.
2) The heat pump heats the water
and sends it to the hot water tank.
3) The hot water is then distributed to the
rooms through the hot water to rooms pipes.
4) The return water from the rooms is then sent back to the
heat pump through the return water from rooms pipes.
5) The process repeats itself."},
{"question": "What are some of the important safety precautions
that should be taken when working on this system?",
"answer": "Some of the important safety precautions
that should be taken when working on this system include:
1) Always turn off the power to the system before working on it.
2) Be sure to use proper safety equipment,
such as gloves and eye protection.
3) Never work on the system while it is hot.
4) Be aware of the location of
all of the components in the system.
5) If you are unsure about anything,
always consult a qualified professional."}
]
}
"""
fixed_faq = """
{"catalogue_faqs": [{"question": "What is the purpose of this diagram?",
"answer": "This diagram shows the piping and wiring schematic
for an AO Smith heat pump water heater."},
{"question": "What are the different components shown in the diagram?",
"answer": "The diagram shows the following
components:\n1) Hot water to rooms\n2) Tank temp sensor\n
3) Hot water outlet\n4) Flow switch\n
5) Vibration pads\n6) Return water from rooms\n
7) Cold water inlet to heat pump\n8) FFL note"},
{"question": "What are the different
steps involved in the operation of this system?",
"answer": "The steps involved in the
operation of this system are as follows:\n
1) Cold water enters the heat pump through the cold water inlet.\n
2) The heat pump heats the water and sends it to the hot water tank.\n
3) The hot water is then distributed to the
rooms through the hot water to rooms pipes.\n
4) The return water from the rooms is then
sent back to the heat pump through the return water from rooms pipes.\n
5) The process repeats itself."},
{"question": "What are some of the important
safety precautions that should be taken when working on this system?",
"answer": "Some of the important safety
precautions that should be taken when working on this system include:\n
1) Always turn off the power to the system before working on it.\n
2) Be sure to use proper safety equipment,
such as gloves and eye protection.\n
3) Never work on the system while it is hot.\n
4) Be aware of the location of all of the components in the system.\n
5) If you are unsure about anything,
always consult a qualified professional."}]}
"""
# failed_faq = get_failed_faq_template()
check_faq_prompt = f"""For the following text,
examine if it contains a description,
product specifications or features.
If found, return a boolean response True.
If not found, return a boolean response False.
{context}"""
faq_prompt = f"""Generate a list of frequently asked
questions (FAQ) based only on the provided input.
Extract the key points, common queries,
and important details to create a concise
and informative set of questions and
answers that would provide clarity on this subject for readers.
Return the output in JSON format.
input: 02 SINGLE SIZE PAPER NAPKIN
MACHINE Size: 30 X 30 CM No of Printing :
As Per Requirement Embossing Unit:
As Per Requirement Motor: 1) 3hp motor with variable
AC drive with VDF (Variable frequency drive)
2) 1hp motor (AC) Speed: 2,50,000 PIECES / 8 HOURS
Weight: 2500 kgs (approx) Man Power: One operator &
One helper Raw Material:
Tissue paper 12 to 30 gsm Counting: digital Counting Band saw
cutting with mauling sharping system PRICE WITHOUT PRINT : `4,50,000
1 COLOUR PRINT : `5,75,000 2 COLOUR PRINT : `6,25,000
output: {faq_json_format}
input: {context}
output:
"""
check_specs_prompt = f"""For the following text, examine
if it contains a product description, specifications
or features. If found, return a boolean response True.
If not found, return a boolean response False.
{context}
"""
product_specs_prompt = f"""Convert the following text into a
product specifications JSON containing \"product_name\"
and other \"specifications\".
Also, add a \"confidence_score\" to the end of the JSON.
input: 02 SINGLE SIZE PAPER NAPKIN MACHINE
Size: 30 X 30 CM No of Printing : As Per Requirement Embossing Unit:
As Per Requirement Motor: 1) 3hp
motor with variable AC drive with VDF (Variable frequency drive)
2) 1hp motor (AC) Speed: 2,50,000 PIECES / 8
HOURS Weight: 2500 kgs (approx) Man Power: One operator &
One helper Raw Material: Tissue paper 12 to
30 gsm Counting: digital Counting Band saw
cutting with mauling sharping system
PRICE WITHOUT PRINT :
`4,50,000 1 COLOUR PRINT :
`5,75,000 2 COLOUR PRINT : `6,25,00
output: {sample_json_response}
input: {context}
output:
"""
company_details_prompt = f"""Convert the following
text into a JSON containing company details.
Ensure that the details extracted
are based solely on the content of the
following text and are as accurate as possible.
input: CONTACT US COMPLETE MACHINE GLOBAL
CONVERSION MACHINES SALES TEAM MARKETING
TEAM SERVICE TEAM + 91 958 215 2344 + 91 874 482 8924 + 91 888 291 3467
Email : globalconversionmachines@gmail.com
Follow us on @ globalcmachines @
globalconversionmachines Global Conversion Machines
output: {company_details_format}
input: {context}
output:
"""
fix_json_prompt = f"""Fix the error/malformation
in the following JSON and ensure that you only return a valid JSON.
Error: {error}
input: {malformed_json}
output: {fixed_json}
input: {context}
output:
"""
tags_and_label_prompt = f"""Suggest some tags and
a category for the given product name and
description and convert it into JSON format.
Ensure that the suggestions are based solely on the content of
the text and are as accurate as possible.
input: Product Name: Non Woven Bag Making Machine
Product Description: 07 NON WOVEN BAG MAKING MACHINE Technical Specification
Fabric Paper Tube Diameter: 2.75 -3 Inches
Max Speed: 20 -120 Bags /min Bag Width: 3.9-32
Inches Bag Height: 7.75-24mm Bottom Insert Size: 1.20-3.25 Inches
Side Folding Size: 1.20-3.25 Inches
Bag Thickness: 30-120g Power Supplier: 220v/380v Power: 12kw 15kw
Overall Dimension (L*w*h): 7600*1900*2100mm Weight: 2200 Kg Air
Compressor: 0.6-1.0 Mpa Unwinding Method: Magnetic Power Tension Control
Unwinding Diameter: 1000 Mm Max.width Of
Unwinding: 1300 Mm Correction Device: Photoelectric
Epc System An Synchronous Rectification Motor 110w PRICE PRICE: ` 15,95,000
output: {non_woven_bag}
input: Product Name: Toilet Roll Machine
Product Description: 04 TOILET ROLL MACHINE PRODUCT PRICE PRODUCTION TYPE
1 4500 Rolls / 8 Hours Toilet Roll ` 4,75,000
TYPE 2 9000 Rolls / 8 Hours Toilet Roll,
` 5,25,000 Kitchen Rolls ,
Non Woven Cleaning Rolls,
Hospital Bed Tissue Rolls.
TYPE 3 17,000 Rolls /
8 Hours Toilet Roll,
`11,50,000 Kitchen Rolls,
Non Woven Cleaning Rolls,
Hospital Bed Tissue Rolls.
TYPE 4 Any Customized Production Capacity
output: {toilet_roll_machine}
input: Product Name: {product_name}
Product Description: {context}
output:
"""
fix_faq_json_prompt = f"""Fix the error/malformation in the following
JSON and ensure that you only return a valid JSON.
Error: {error}
input: {malformed_faq}
output: {fixed_faq}
input: {context}
output:
"""
if task == "faq":
return faq_prompt
elif task == "specs":
return product_specs_prompt
elif task == "check_specs":
return check_specs_prompt
elif task == "company_details":
return company_details_prompt
elif task == "image_tags_and_labels":
return tags_and_label_prompt
elif task == "fix_json":
return fix_json_prompt
elif task == "fix_faq_json":
return fix_faq_json_prompt
else:
return check_faq_prompt
def fix_json(error, context, project):
try:
fix_json_prompt = get_prompt(context, "fix_json", error)
fix_json_response = get_model_response(fix_json_prompt, project)
if not fix_json_response.is_blocked:
response = \
ast.literal_eval(fix_json_response.
text.strip().replace('null', 'None'))
print("[INFO]: JSON fixed successfully!")
else:
print("[WARNING]: Fix JSON Response Blocked by LLM.")
response = get_blocked_response_template()
response["response_error"]["safety_attributes"] =\
fix_json_response.safety_attributes
return response
return response
except SyntaxError as e:
print(f"[ERROR]: SyntaxError during fixing JSON. The LLM may have"
f" again returned a malformed JSON! \n{e}\n")
print(fix_json_response.text.strip())
response = get_blocked_response_template()
response["response_error"]["is_blocked"] = False
response["response_error"][
"message"] = f"""The LLM repeatedly returned
malformed JSON's!
\n{fix_json_response.text.strip().
replace('null', 'None')}"""
return response
except Exception as e:
print(f"[ERROR]: Unknown error during fixing JSON. \n{e}\n")
print(fix_json_response.text.strip())
response = get_blocked_response_template()
response["response_error"]["is_blocked"] = False
response["response_error"]["message"] = \
f"""{fix_json_response.text.strip().replace('null', 'None')}"""
return response
def generate_tags_and_labels(context, products, project):
try:
tags_and_labels = {}
for product in products:
tags_and_labels_prompt = get_prompt(context,
"image_tags_and_labels",
product_name=product)
tags_and_labels_response = \
get_model_response(tags_and_labels_prompt,
project)
if not tags_and_labels_response.is_blocked:
response = ast.literal_eval(tags_and_labels_response.
text.strip())
tags_and_labels[product] = response
else:
print("Tags and Label Generation Response Blocked by LLM.")
response = get_blocked_response_template()
response["response_error"]["safety_attributes"] =\
tags_and_labels_response.safety_attributes
return response
return tags_and_labels
except SyntaxError as e:
print(
f"[ERROR]: SyntaxError during Tags "
f"and Label generation. "
f"The LLM may have returned a malformed JSON! \n{e}")
print(tags_and_labels_response.text.strip())
return {}
except Exception as e:
print(f"[ERROR]: Unknown error "
f"during Tags and Label generation. \n{e}")
print(tags_and_labels_response.text.strip())
return {}
def generate_isqs(context, project):
try:
product_specs_prompt = get_prompt(context, "specs")
product_isqs = get_model_response(product_specs_prompt, project)
if not product_isqs.is_blocked:
isq_response = \
ast.literal_eval(product_isqs.text.
strip().replace('null', "None"))
else:
print(f"[WARNING]: ISQ Generation Response "
f"blocked by LLM: {product_isqs.safety_attributes}")
isq_response = get_blocked_response_template()
isq_response["response_error"]["safety_attributes"] = \
product_isqs.safety_attributes
return isq_response
return isq_response
except SyntaxError as e:
print(f"[ERROR]: SyntaxError during ISQ generation. "
f"The LLM may have returned a malformed JSON! \n{e}\n")
print(product_isqs.text.strip())
isq_response = fix_json(e, product_isqs.text.strip())
return isq_response
except Exception as e:
print(f"[ERROR]: Unknown error during ISQ generation. \n{e}\n")
print(product_isqs.text.strip())
isq_response = get_blocked_response_template()
isq_response["response_error"]["is_blocked"] = False
isq_response["response_error"]["message"] = \
f"""{product_isqs.text.strip().replace('null', 'None')}"""
return isq_response
def generate_faqs(context, project):
try:
get_faq_prompt = get_prompt(context, "faq")
faq_response = get_model_response(get_faq_prompt, project)
if not faq_response.is_blocked:
response = \
ast.literal_eval(faq_response.
text.strip().
replace('null', 'None'))
else:
print(f"[WARNING]: FAQ Generation "
f"Response Blocked by LLM. {faq_response.safety_attributes}")
response = get_blocked_response_template()
response["response_error"]["safety_attributes"] =\
faq_response.safety_attributes
return response
return response
except SyntaxError as e:
print(f"[ERROR]: SyntaxError during FAQ generation."
f" The LLM may have returned a malformed JSON! \n{e}\n")
print(faq_response.text.strip())
response = fix_faq_json(e, faq_response.text.strip())
return response
except Exception as e:
print(f"[ERROR]: Unknown error during FAQ generation. \n{e}\n")
print(faq_response.text.strip())
response = get_failed_faq_template()
response["catalogue_faqs"][0]["response_error"] = f"{e}"
response["catalogue_faqs"][0]["llm_response"] = \
f"{faq_response.text.strip().replace('null', 'None')}"
return response
def generate_company_details(company_text, project):
try:
response = {}
company_details_prompt = get_prompt(company_text, "company_details")
company_details_response = \
get_model_response(company_details_prompt, project)
if not company_details_response.is_blocked:
response = \
ast.literal_eval(company_details_response.
text.strip().
replace('null', 'None'))
print("[INFO]: Company Details Extraction Completed")
else:
print(
f"[WARNING]: Company Details Extraction "
f"Response blocked by LLM. "
f"{company_details_response.safety_attributes}")
response = get_blocked_response_template()
response["response_error"]["safety_attributes"] = \
company_details_response.safety_attributes
return response
return response
except SyntaxError as e:
print(
f"[ERROR]: SyntaxError during company"
f" details extraction. The LLM"
f" may have returned a malformed JSON! \n{e}\n")
print(company_details_response.text.strip())
return {}
except Exception as e:
print(f"[ERROR]: Unknown error during "
f"company details extraction. \n{e}\n")
print(company_details_response.text.strip())
return {}
def fix_faq_json(error, context):
try:
fix_json_prompt = get_prompt(context, "fix_faq_json", error)
fix_json_response = get_model_response(fix_json_prompt)
if not fix_json_response.is_blocked:
response = \
ast.literal_eval(fix_json_response.
text.strip().replace('null', 'None'))
print("[INFO]: JSON fixed successfully!")
else:
print("[WARNING]: Fix JSON Response Blocked by LLM.")
response = get_blocked_response_template()
response["response_error"]["safety_attributes"] = \
fix_json_response.safety_attributes
return response
return response
except SyntaxError as e:
print(f"[ERROR]: SyntaxError during fixing FAQ JSON. "
f"The LLM may have again returned a malformed JSON! \n{e}\n")
print(fix_json_response.text.strip())
response = get_failed_faq_template()
response["catalogue_faqs"][0]["response_error"] = f"{e}"
response["catalogue_faqs"][0]["llm_response"] = \
f"{fix_json_response.text.strip().replace('null', 'None')}"
return response
except Exception as e:
print(f"[ERROR]: Unknown error during fixing FAQ JSON. \n{e}\n")
print(fix_json_response.text.strip())
response = get_failed_faq_template()
response["catalogue_faqs"][0]["response_error"] = f"{e}"
response["catalogue_faqs"][0]["llm_response"] = \
f"{fix_json_response.text.strip().replace('null', 'None')}"
return response
def vertex_ai_llm(prompt):
try:
parameters = {
"candidate_count": 1,
"max_output_tokens": 1024,
"temperature": 0,
"top_p": 0.95,
"top_k": 40
}
model = TextGenerationModel.from_pretrained("text-bison")
response = model.predict(prompt, **parameters)
return response.text
except Exception:
print(f"[ERROR]: Vertex AI LLM API failed -"
f" {str(traceback.format_exc())}")
return ''
def visual_question(image, question):
from vertexai.vision_models import ImageTextModel, Image
try:
model = ImageTextModel.from_pretrained("imagetext@001")
source_image = Image(image)
answers = model.ask_question(
image=source_image,
question=question,
# Optional:
number_of_results=3,
)
return answers
except Exception:
print(f"[ERROR]: Vertex AI VQA "
f"API failed - {str(traceback.format_exc())}")
return ['', '', '']
def image_caption(image):
from vertexai.vision_models import ImageTextModel, Image
try:
model = ImageTextModel.from_pretrained("imagetext@001")
source_image = Image(image)
captions = model.get_captions(
image=source_image,
# Optional:
number_of_results=3,
language="en",
)
return captions
except Exception:
print(f"[ERROR]: Vertex AI Image caption API failed "
f"- {str(traceback.format_exc())}")
return ['', '', '']
def get_options(products, product_descriptions=False):
options = ""
for product_no, product in enumerate(products):
product = product.replace("'", "")
product = product.replace('"', "")
product = product.replace("\n", "")
# product = product.replace("\n","")
# if product not in products_image_map:
# products_image_map[product] = []
options = options + f"{str(product_no + 1)}. {product}\n"
return options
def product_description_from_text_prompt(text, products):
options = get_options(products)
prompt = f"""
This is the extracted text from pdf page.
As it is extracted using OCR, the order
of the words and spellings might not be completely correct.\
you need to provide short product caption
based on the extracted text in json format
Example Extracted Text
Prodcuct A is a bench .
It is of white color.
I am having a good day.
I need a toilet roll making machine like product B.
Example Input products:
1. Product A
2. Product B
Example Output format:
```
json
{str({"Product A": "it is a white colored bench",
"Product B": "It is a machine which is used to make toilet rolls."})}
```
Extarcted Text:
```
{text.replace("'", "").replace('"', "")}
```
Input products:
{options}
"""
return prompt
def product_tags_from_text_prompt(text, products):
options = get_options(products)
prompt = f"""
This is the extracted text from pdf page. As it is extracted using OCR,
the order of the words and spellings might not be completely correct.\
you need to provide 3 tags for each of the products based
on the extracted text in json format
Example Extracted Text
Toilet roll making machine can make toilet rolls easily. Its weight is 2kg.
Contly medicine tablets can cure liver diseases. It has no side effects.
Example Input products:
1. Toilet Roll Machine
2. Contly
Example Output format:
```
json
{str({"Toilet Roll Machine": ["machine", "industrial machine", "tool"],
"Contly": ["capsule", "medicine", "tablet"]})}
```
Extarcted Text:
```
{text.replace("'", "").replace('"', "")}
```
Input products:
{options}
"""
return prompt
def product_category_from_text_prompt(text, products):
options = get_options(products)
prompt = f"""
This is the extracted text from pdf page.
As it is extracted using OCR,
the order of the words and spellings might not be completely correct.
you need to provide product category
for each of the products based on
the extracted text in json format
Example Extracted Text
Toilet roll making machine can make toilet rolls easily.
It's weight is 2kg.
Contly medicine tablets can cure liver diseases. It has no side effects.
Example Input products:
1. Toilet Roll Machine
2. Contly
Example Output format:
```
json
{str({"Toilet Roll Machine": "toilet roll making machine",
"Contly": "liver medicine"})}
```
```
Extarcted Text:
```
{text.replace("'", "").replace('"', "")}
```
Input products:
{options}
"""
return prompt
def map_product_and_image(images, products, product_description):
example_json = {"Product_A": "Image 3",
"Product_B": "Image 7",
"Product_C": "",
"Product_D": "Image 2"}
images_str = ""
for image_no, image in enumerate(images, start=1):
image_name = f"Image {str(image_no)}\n"
caption1 = image["caption1"].replace('"', '').replace("'", "")
caption2 = image["caption2"].replace('"', '').replace("'", "")
images_str = images_str + f"{image_name}\nMain Caption -" \
f" reliable and correct:" \
f"\n{caption1}\n\nSpecific Caption " \
f"Guesses which can be " \
f"incorrect:\n{caption2}\n\n"
products_str = get_options(products)
product_description_str = ""
for product_no, product in enumerate(products, start=1):
if product in product_description:
product_desc = product_description[product]
product = product.replace("'", "")
product = product.replace('"', "")
product = product.replace("\n", "")
product_description_str = \
product_description_str + f"{product}: {str(product_desc)}\n"
prompt = f"""
You need to map products with images.
For images we have generated 2 types of captions using different methods.
Main caption of image is generic but it is accurate.
Specific caption guesses of image might
be very specific but can be sometimes wrong.
Specific caption guesses contain 3 values,
with 1st one has higher chances of being correct
and 3rd one has comparatively lower chances of being correct.
Very Important points to remember:
1. A product can only be mapped to maximum one image only.
2. An image can only be mapped to maximum one product only.
3. There is also possiblity that there will no image for a product.
4. There is also possiblity that an image is not relevant to any product.
Output should be in json format with product as key
and mapped image as value. If no image can be
mapped for a product, then simply keep its value as empty.
Output format:
```
json
{example_json}
```
Products are as follows:
{products_str}
To better understand products, we also have mapped
tags for some of the products, might not be there for all products.
Compare the main caption with product
tags for more acccurate product image mapping.
Check product tags properly, so that
you dont confuse similar name object with actual products.
example: image of toilet rolls are not
to be confused or mapped with toilet roll making machine
Here are product tags:
{product_description_str}
Images are as follows:
{images_str}
Output:
"""
return prompt
def llm_json_to_dict(llm_json_text):
try:
start = llm_json_text.rfind('{')
end = llm_json_text.rfind('}')
answers = llm_json_text[start:end + 1]
answer_dict = json.loads(answers)
return answer_dict
except Exception:
start = llm_json_text.rfind('{')
end = llm_json_text.rfind('}')
answers = llm_json_text[start:end + 1].replace("'", '"')
answer_dict = json.loads(answers)
return answer_dict
def get_specific_caption(pdf_json):
try:
pdf_gcs_uri = pdf_json["file_url"]
pdf_gcs_path = pdf_gcs_uri.replace("gs://", "")
input_gcs_bucket = pdf_gcs_path.split("/")[0]
filename = pdf_gcs_path.replace(f"{input_gcs_bucket}/", "")
bucket_object = storage.Client().bucket(input_gcs_bucket)
blob = bucket_object.blob(filename)
zoom = 1
mat = fitz.Matrix(zoom, zoom)
k = 0
all_done = False
max_images = 0
while not all_done:
# print(max_images,k)
pdf_file = fitz.open("pdf", blob.download_as_bytes())
for page_index, page in enumerate(pdf_file):
images_info = pdf_json["pages"][page_index]["images"]
no_of_images = len(images_info)
if no_of_images > max_images:
max_images = no_of_images
if no_of_images > k:
images_info_left = images_info[k:]
for i, image_info in \
enumerate(images_info_left, start=k + 1):
bbox = image_info["bbox"]
# width = bbox[2] - bbox[0]
# height = bbox[3] - bbox[1]
# print(width,height)
page.draw_rect([bbox[0] - 2,
bbox[1] - 2,
bbox[2] + 2,
bbox[3] + 2],
color=(1, 0, 0), width=3)
pix = page.get_pixmap(matrix=mat)
pix.save("img.png")
with open("img.png", "rb") as image:
img = image.read()
# display(Img(img))
question = "What is there in the " \
"image which is highlighted " \
"by a red bounding box?"
captions = visual_question(img, question)
# print(captions)
temp = pdf_json["pages"][page_index]
temp["images"][k]["specific_captions"] \
= captions
break
k = k + 1
# print(max_images,k)
if k >= max_images:
all_done = True
except Exception:
print(f"[ERROR]: Specific caption "
f"generation failed - {str(traceback.format_exc())}")
return pdf_json
def parse_prod_name(products, product_description):
product_description_int = {}
for product in product_description:
x = re.sub(r'\W+', '', product)
x = x.lower()
product_description_int[x] = product_description[product]
product_description_final = {}
for product in products:
x = re.sub(r'\W+', '', product)
x = x.lower()
if x in product_description_int:
product_description_final[product] = product_description_int[x]
return product_description_final
def generate_tags_json(context, products):
product_tags_prompt = product_tags_from_text_prompt(context, products)
# print(product_tags_prompt)
product_tags = vertex_ai_llm(product_tags_prompt)
product_tags = llm_json_to_dict(product_tags)
product_tags = parse_prod_name(products, product_tags)
return product_tags
def generate_category_json(context, products):
product_category_prompt = \
product_category_from_text_prompt(context, products)
# print(product_category_prompt)
product_category = vertex_ai_llm(product_category_prompt)
product_category = llm_json_to_dict(product_category)
product_category = parse_prod_name(products, product_category)
return product_category
def generate_images_json(page, products, product_tags, bucket_name):
products_image_map = {}
# product_descriptions = {}
images = page["images"]
# text = page["texts"]["full_text"]
if len(products) > 0:
images_captions = []
for image_no, image in enumerate(images):
url = image['image_url']
# print(url)
filename = url.replace(f"gs://{bucket_name}/", "")
# print(filename)
bucket = storage.Client().bucket(bucket_name)
blob = bucket.get_blob(filename)
img = blob.download_as_bytes()
captions = image_caption(img)
images[image_no]["captions"] = captions
# display(Img(img))
# print(captions)
try:
specific_captions = image["specific_captions"]
except Exception as err:
print(f"[ERROR]: No specific caption generated - {err}")
specific_captions = ['', '', '']
try:
caption1 = captions[0]
except Exception as err:
print(f"[ERROR]: No generic caption generated - {err}")
caption1 = '\n'
images_captions.append({"image": image,
"caption1": caption1,
"caption2": str(specific_captions)})
prompt = map_product_and_image(images_captions, products, product_tags)
# print(prompt)
response = vertex_ai_llm(prompt)
# print(response)
products_image_map = llm_json_to_dict(response)
# print(products_image_map)
product_images = {}
for product in products_image_map:
# print(product)
try:
# image_no = int(re.sub("\D", "", products_image_map[product]))
image_no = int(products_image_map[product])
image = images[image_no - 1]
generic_caption = image["captions"]
specific_caption = image["specific_captions"]
url = image['image_url']
filename = url.replace(f"gs://{bucket_name}/", "")
blob = bucket.get_blob(filename)
img = blob.download_as_bytes()
# display(Img(img))
product_images[product] = {"image_url": url,
"generic_caption": generic_caption,
"specific_caption": specific_caption}
except Exception:
product_images[product] = "No image found"
# print("No image found for this product")
# print("\n\n")
return product_images