in document_ai_warehouse/common/src/common/utils/docai_warehouse_helper.py [0:0]
def get_metadata_properties(key_values, schema) -> List[contentwarehouse_v1.Property]:
def get_type_using_schema(property_name):
for prop in schema.property_definitions:
if prop.name == property_name:
for t in [
"text_type_options",
"date_time_type_options",
"float_type_options",
"integer_type_options",
]:
if t in prop:
return t
return None
metadata_properties = []
for key, value in key_values:
value_type = get_type_using_schema(key)
if value_type is not None:
Logger.info(
f"get_metadata_properties key={key}, value={value}, type={value_type}"
)
one_property = contentwarehouse_v1.Property()
one_property.name = key
try:
if value_type == "text_type_options":
one_property.text_values = contentwarehouse_v1.TextArray(
values=[str(value)]
)
elif value_type == "float_type_options":
one_property.float_values = contentwarehouse_v1.FloatArray(
values=[float(value)]
)
elif value_type == "integer_type_options":
one_property.integer_values = contentwarehouse_v1.IntegerArray(
values=[int(value)]
)
elif value_type == "date_time_type_options":
date_time = pd.to_datetime(value)
dt = datetime_pb2.DateTime(
year=date_time.year,
month=date_time.month,
day=date_time.day,
hours=date_time.hour,
minutes=date_time.minute,
seconds=date_time.second,
utc_offset={},
)
one_property.date_time_values = contentwarehouse_v1.DateTimeArray(
values=[dt]
)
else:
Logger.warning(
f"Unsupported property type {value_type} for {key} = {value} Skipping. "
)
continue
metadata_properties.append(one_property)
except Exception as ex:
Logger.warning(
f"Could not load {key} = {value} of type {value_type} as property. Skipping. Exception = {ex}"
)
continue
else:
Logger.warning(
f"get_metadata_properties key={key}, value={value}, Type not detected"
)
return metadata_properties