def serialize()

in src/nanotron/config/utils_config.py [0:0]


def serialize(data) -> dict:
    """Recursively serialize a nested dataclass to a dict - do some type conversions along the way"""
    if data is None:
        return None

    if not hasattr(data, "__dataclass_fields__"):
        return data

    result = {}
    for field in fields(data):
        value = getattr(data, field.name)
        if hasattr(value, "__dataclass_fields__"):
            result[field.name] = serialize(value)
        elif isinstance(value, Path):
            result[field.name] = str(value)
        elif isinstance(value, PipelineEngine):
            result[field.name] = cast_pipeline_engine_to_str(value)
        elif isinstance(value, TensorParallelLinearMode):
            result[field.name] = value.name
        elif isinstance(value, RecomputeGranularity):
            result[field.name] = value.name
        elif isinstance(value, InitScalingMethod):
            result[field.name] = value.name
        elif isinstance(value, SamplerType):
            result[field.name] = value.name
        elif isinstance(value, torch.dtype):
            result[field.name] = dtype_to_str[value]
        elif isinstance(value, (list, tuple)):
            result[field.name] = [serialize(v) for v in value]
        elif isinstance(value, dict) and not value:
            result[field.name] = None  # So we can serialize empty dicts without issue with `datasets` in particular
        else:
            result[field.name] = value

    return result