mime_types.py (109 lines of code) (raw):

# Copyright 2025 DeepMind Technologies Limited. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """MIME types commonly used by Processors at the `Part` level. Processors uses MIME types to express part modalities[^1]. If this file doesn't contain an appropriate MIME type you can pick one from https://www.iana.org/assignments/media-types/media-types.xhtml or define a custom */x-*. """ from typing import Any IMAGE_PNG = 'image/png' IMAGE_JPEG = 'image/jpeg' IMAGE_WEBP = 'image/webp' IMAGE_HEIC = 'image/heic' IMAGE_HEIF = 'image/heif' INPUT_IMAGE_TYPES = [ IMAGE_HEIF, IMAGE_HEIC, IMAGE_JPEG, IMAGE_PNG, IMAGE_WEBP, ] AUDIO_AAC = 'audio/aac' AUDIO_FLAC = 'audio/flac' AUDIO_MP3 = 'audio/mp3' AUDIO_M4A = 'audio/m4a' AUDIO_MPEG = 'audio/mpeg' AUDIO_MPGA = 'audio/mpga' AUDIO_MP4 = 'audio/mp4' AUDIO_OPUS = 'audio/opus' AUDIO_PCM = 'audio/pcm' AUDIO_WAV = 'audio/wav' AUDIO_WEBM = 'audio/webm' INPUT_AUDIO_TYPES = [ AUDIO_AAC, AUDIO_FLAC, AUDIO_MP3, AUDIO_M4A, AUDIO_MPEG, AUDIO_MPGA, AUDIO_MP4, AUDIO_OPUS, AUDIO_PCM, AUDIO_WAV, AUDIO_WEBM, ] VIDEO_MOV = 'video/mov' VIDEO_MPEG = 'video/mpeg' VIDEO_MPEGPS = 'video/mpegps' VIDEO_MPG = 'video/mpg' VIDEO_MP4 = 'video/mp4' VIDEO_WEBM = 'video/webm' VIDEO_WMV = 'video/wmv' VIDEO_X_FLV = 'video/x-flv' VIDEO_3GPP = 'video/3gpp' VIDEO_QUICKTIME = 'video/quicktime' INPUT_VIDEO_TYPES = [ VIDEO_MOV, VIDEO_MPEG, VIDEO_MPEGPS, VIDEO_MPG, VIDEO_MP4, VIDEO_WEBM, VIDEO_WMV, VIDEO_X_FLV, VIDEO_3GPP, VIDEO_QUICKTIME, ] TEXT_PDF = 'application/pdf' TEXT_PLAIN = 'text/plain' TEXT_CSV = 'text/csv' TEXT_HTML = 'text/html' TEXT_XML = 'text/xml' TEXT_PYTHON = 'text/x-python' TEXT_SCRIPT_PYTHON = 'text/x-script.python' TEXT_JSON = 'application/json' INPUT_TEXT_TYPES = [ TEXT_PDF, TEXT_PLAIN, TEXT_CSV, TEXT_HTML, TEXT_XML, TEXT_PYTHON, TEXT_SCRIPT_PYTHON, TEXT_JSON, ] ALL_SUPPORTED_INPUT_TYPES = ( INPUT_IMAGE_TYPES + INPUT_AUDIO_TYPES + INPUT_VIDEO_TYPES + INPUT_TEXT_TYPES ) def is_text(mime: str) -> bool: """Returns whether the content is a human-readable text.""" return mime in INPUT_TEXT_TYPES or mime.startswith('text/') def is_json(mime: str) -> bool: """Returns whether the content is a human-readable json.""" return mime == TEXT_JSON or mime.startswith(TEXT_JSON) def is_dataclass(mime: str, json_dataclass: type[Any] | None = None) -> bool: """Returns whether the content is a dataclass.""" type_name = json_dataclass.__name__ if json_dataclass else '' return mime.startswith(f'application/json; type={type_name}') def is_image(mime: str) -> bool: """Returns whether the content is an image.""" return (mime in INPUT_IMAGE_TYPES) or mime.startswith('image/') def is_video(mime: str) -> bool: """Returns whether the content is a video. Args: mime: The mime string. Returns: True of it is a video, False otherwise. """ return (mime in INPUT_VIDEO_TYPES) or mime.startswith('video/') def is_audio(mime: str) -> bool: """Returns whether the content is audio.""" return (mime in INPUT_AUDIO_TYPES) or mime.startswith('audio/') def is_streaming_audio(mime: str) -> bool: """Returns whether the content is streaming audio.""" return mime.startswith('audio/l16') def is_wav(mime: str) -> bool: """Returns whether the content is a wav file.""" return mime == AUDIO_WAV def is_source_code(mime: str) -> bool: """Returns whether the content is a source code in some language.""" # This list is incomplete and will be extended on as-needed basis. return mime in ( 'text/x-python', 'application/x-latex', 'text/x-c', ) def is_pdf(mime: str) -> bool: """Returns whether the content is a PDF.""" return mime == TEXT_PDF def is_csv(mime: str) -> bool: """Returns whether the content is a CSV file.""" return mime == TEXT_CSV def is_python(mime: str) -> bool: """Returns whether the content is python code.""" return mime in [TEXT_PYTHON, TEXT_SCRIPT_PYTHON]