def as_annotated_data()

in python-package/lets_plot/plot/util.py [0:0]


def as_annotated_data(data: Any, mapping_spec: FeatureSpec) -> Tuple:
    data_type_by_var: Dict[str, str] = {}  # VarName to Type
    mapping_meta_by_var: Dict[str, Dict[str, MappingMeta]] = {}  # VarName to Dict[Aes, MappingMeta]
    mappings = {}  # Aes to VarName

    # fill mapping_meta_by_var, mappings and data_type_by_var.
    if mapping_spec is not None:
        for key, spec in mapping_spec.props().items():
            # the key is either an aesthetic name or 'name' (FeatureSpec.name property)
            if key == 'name':  # ignore FeatureSpec.name property
                continue

            if isinstance(spec, MappingMeta):
                mappings[key] = spec.variable
                mapping_meta_by_var.setdefault(spec.variable, {})[key] = spec
                data_type_by_var[spec.variable] = TYPE_UNKNOWN
            else:
                mappings[key] = spec  # spec is a variable name

    data_type_by_var.update(_infer_type(data))

    # Detect the tome zone - one for the entire data set.
    time_zone_by_var_name = {}
    for var_name, data_type in data_type_by_var.items():
        if data_type == TYPE_DATE_TIME:
            time_zone = _detect_time_zone(var_name, data)
            if time_zone is not None:
                time_zone_by_var_name[var_name] = time_zone

    # fill series annotations
    series_annotations = {}  # var to series_annotation
    for var_name, data_type in data_type_by_var.items():
        series_annotation = {}

        if data_type != TYPE_UNKNOWN:
            series_annotation['type'] = data_type

        if var_name in time_zone_by_var_name:
            series_annotation['time_zone'] = time_zone_by_var_name[var_name]

        if is_pandas_data_frame(data) and data[var_name].dtype.name == 'category' and data[var_name].dtype.ordered:
            series_annotation['factor_levels'] = data[var_name].cat.categories.to_list()

        elif is_polars_dataframe(data):
            import polars

            col_dtype = data[var_name].dtype
            if isinstance(col_dtype, polars.datatypes.Enum):
                series_annotation['factor_levels'] = list(col_dtype.categories)
            elif isinstance(col_dtype, polars.datatypes.Categorical):
                # # It does not seem possible to get categories in correct order from the Categorical dtype.
                # categories_series = data[var_name].cat.get_categories()
                # indises = [col_dtype.categories[cat] for cat in categories_series]
                # cats = [col_dtype.categories[i] for i in indises]
                # series_annotation['factor_levels'] = categories_series.to_list()
                pass

        elif var_name in mapping_meta_by_var:
            levels = last_not_none(list(map(lambda mm: mm.levels, mapping_meta_by_var[var_name].values())))
            if levels is not None:
                series_annotation['factor_levels'] = levels

        if 'factor_levels' in series_annotation and var_name in mapping_meta_by_var:
            order = last_not_none(list(map(lambda mm: mm.parameters['order'], mapping_meta_by_var[var_name].values())))
            if order is not None:
                series_annotation['order'] = order

        if len(series_annotation) > 0:
            series_annotation['column'] = var_name
            series_annotations[var_name] = series_annotation

    # fill mapping annotations
    mapping_annotations = []
    for var_name, meta_data in mapping_meta_by_var.items():
        for aesthetic, mapping_meta in meta_data.items():
            if mapping_meta.annotation == 'as_discrete':
                if 'factor_levels' in series_annotations.get(var_name, {}):
                    #  there is a bug - if label is set then levels are not applied
                    continue

                mapping_annotation = {}

                # Note that the label is always set; otherwise, the scale title will appear as 'color.cyl'
                label = mapping_meta.parameters.get('label')
                if label is not None:
                    mapping_annotation.setdefault('parameters', {})['label'] = label

                if mapping_meta.levels is not None:
                    mapping_annotation['levels'] = mapping_meta.levels

                order_by = mapping_meta.parameters.get('order_by')
                if order_by is not None:
                    mapping_annotation.setdefault('parameters', {})['order_by'] = order_by

                order = mapping_meta.parameters.get('order')
                if order is not None:
                    mapping_annotation.setdefault('parameters', {})['order'] = order

                # add mapping meta if a custom label is set or if series annotation for var doesn't contain order options
                # otherwise don't add mapping meta - it's redundant, nothing unique compared to series annotation
                if len(mapping_annotation):
                    mapping_annotation['aes'] = aesthetic
                    mapping_annotation['annotation'] = 'as_discrete'
                    mapping_annotations.append(mapping_annotation)

    data_meta = {}

    if len(series_annotations) > 0:
        data_meta.update({'series_annotations': list(series_annotations.values())})

    if len(mapping_annotations) > 0:
        data_meta.update({'mapping_annotations': mapping_annotations})

    return data, aes(**mappings), {'data_meta': data_meta}