def preprocess_batch()

in data.py [0:0]


def preprocess_batch(rows):
    """
    Process a batch of examples represented as a dictionary where each key maps to a list of values.
    For each key in NONE_KEY_MAP, create a new column with processed details.
    Also, conditionally convert images to RGB if they are not already.
    """
    n = len(next(iter(rows.values())))

    # Prepare storage for new processed columns.
    # For each key in NONE_KEY_MAP, the new key is defined as: <{original_key with spaces replaced, uppercase}>
    processed_data = {}
    for k in NONE_KEY_MAP:
        new_key = f"<{k.replace(' ', '_').upper()}>"
        processed_data[new_key] = []

    # Process each example (by index)
    for i in range(n):
        # For each key in NONE_KEY_MAP, process the value for the i-th example.
        for k in NONE_KEY_MAP:
            # If the key is missing, we assume a list of Nones.
            value = rows.get(k, [None] * n)[i]
            if value:
                if isinstance(value, list):
                    detail = ", ".join(value)
                else:
                    detail = str(value)
            else:
                default = NONE_KEY_MAP[k][0]
                detail = default.replace("no_", "unspecified ").replace("_", " ")
            new_key = f"<{k.replace(' ', '_').upper()}>"
            processed_data[new_key].append(detail)

        # Process the image field if present.
        if "image" in rows:
            image = rows["image"][i]
            if image is not None and hasattr(image, "mode"):
                if image.mode != "RGB":
                    image = image.convert("RGB")
            rows["image"][i] = image

    # Merge the processed columns into the original batch dictionary.
    rows.update(processed_data)
    return rows