in tools/cloud/vision_system_data_serializer.py [0:0]
def push_string(self, encoding, val, upper_bound):
if upper_bound > 0 and len(val) > upper_bound:
raise Exception(f'length of string "{val}" exceeds upper bound {upper_bound}')
if encoding == "utf-8":
code_unit_size = 1
# utf-8 mode has a null-terminator:
val += "\0"
else:
code_unit_size = 2
raw_bytes = val.encode(encoding)
self.push_primitive(PRIMITIVE_TYPES["UINT32"], int(len(raw_bytes) / code_unit_size))
# utf-16 mode stores the 16-bit utf-16 'code units' in 32-bit words:
if encoding == "utf-16":
code_unit_size = 4
utf16_bytes = raw_bytes
raw_bytes = bytearray()
while len(utf16_bytes) > 0:
raw_bytes += utf16_bytes[:2]
raw_bytes += bytearray([0, 0])
utf16_bytes = utf16_bytes[2:]
self.buffer += raw_bytes
self.last_data_size = code_unit_size
self.current_position += len(raw_bytes)