in bot/gemini_model.py [0:0]
def _count_tokens(part: Part) -> int:
if hasattr(part, "text") and part.text is not None:
return int(len(part.text) * 0.3)
if hasattr(part, "inline_data") and part.inline_data is not None:
if part.inline_data.mime_type.startswith("image"):
return 258
elif part.inline_data.mime_type.startswith("video"):
return int(len(part.inline_data.data) / 1000)
elif part.inline_data.mime_type.startswith("audio"):
return int(len(part.inline_data.data) / 800)
# 1000 bytes per token video, 800 bytes per token for audio - those are rough estimates
_LOGGER.debug(f"Counting tokens for {part}"[:200])
start = time.time()
count = _client.models.count_tokens(
model=GEMINI_MODEL_NAME, contents=part
).total_tokens
_LOGGER.debug(
f"Counted tokens for {part} in {time.time() - start}s with token count: {count}"
)
return count