in python-package/lets_plot/plot/util.py [0:0]
def as_annotated_data(data: Any, mapping_spec: FeatureSpec) -> Tuple:
data_type_by_var: Dict[str, str] = {} # VarName to Type
mapping_meta_by_var: Dict[str, Dict[str, MappingMeta]] = {} # VarName to Dict[Aes, MappingMeta]
mappings = {} # Aes to VarName
# fill mapping_meta_by_var, mappings and data_type_by_var.
if mapping_spec is not None:
for key, spec in mapping_spec.props().items():
# the key is either an aesthetic name or 'name' (FeatureSpec.name property)
if key == 'name': # ignore FeatureSpec.name property
continue
if isinstance(spec, MappingMeta):
mappings[key] = spec.variable
mapping_meta_by_var.setdefault(spec.variable, {})[key] = spec
data_type_by_var[spec.variable] = TYPE_UNKNOWN
else:
mappings[key] = spec # spec is a variable name
data_type_by_var.update(_infer_type(data))
# Detect the tome zone - one for the entire data set.
time_zone_by_var_name = {}
for var_name, data_type in data_type_by_var.items():
if data_type == TYPE_DATE_TIME:
time_zone = _detect_time_zone(var_name, data)
if time_zone is not None:
time_zone_by_var_name[var_name] = time_zone
# fill series annotations
series_annotations = {} # var to series_annotation
for var_name, data_type in data_type_by_var.items():
series_annotation = {}
if data_type != TYPE_UNKNOWN:
series_annotation['type'] = data_type
if var_name in time_zone_by_var_name:
series_annotation['time_zone'] = time_zone_by_var_name[var_name]
if is_pandas_data_frame(data) and data[var_name].dtype.name == 'category' and data[var_name].dtype.ordered:
series_annotation['factor_levels'] = data[var_name].cat.categories.to_list()
elif is_polars_dataframe(data):
import polars
col_dtype = data[var_name].dtype
if isinstance(col_dtype, polars.datatypes.Enum):
series_annotation['factor_levels'] = list(col_dtype.categories)
elif isinstance(col_dtype, polars.datatypes.Categorical):
# # It does not seem possible to get categories in correct order from the Categorical dtype.
# categories_series = data[var_name].cat.get_categories()
# indises = [col_dtype.categories[cat] for cat in categories_series]
# cats = [col_dtype.categories[i] for i in indises]
# series_annotation['factor_levels'] = categories_series.to_list()
pass
elif var_name in mapping_meta_by_var:
levels = last_not_none(list(map(lambda mm: mm.levels, mapping_meta_by_var[var_name].values())))
if levels is not None:
series_annotation['factor_levels'] = levels
if 'factor_levels' in series_annotation and var_name in mapping_meta_by_var:
order = last_not_none(list(map(lambda mm: mm.parameters['order'], mapping_meta_by_var[var_name].values())))
if order is not None:
series_annotation['order'] = order
if len(series_annotation) > 0:
series_annotation['column'] = var_name
series_annotations[var_name] = series_annotation
# fill mapping annotations
mapping_annotations = []
for var_name, meta_data in mapping_meta_by_var.items():
for aesthetic, mapping_meta in meta_data.items():
if mapping_meta.annotation == 'as_discrete':
if 'factor_levels' in series_annotations.get(var_name, {}):
# there is a bug - if label is set then levels are not applied
continue
mapping_annotation = {}
# Note that the label is always set; otherwise, the scale title will appear as 'color.cyl'
label = mapping_meta.parameters.get('label')
if label is not None:
mapping_annotation.setdefault('parameters', {})['label'] = label
if mapping_meta.levels is not None:
mapping_annotation['levels'] = mapping_meta.levels
order_by = mapping_meta.parameters.get('order_by')
if order_by is not None:
mapping_annotation.setdefault('parameters', {})['order_by'] = order_by
order = mapping_meta.parameters.get('order')
if order is not None:
mapping_annotation.setdefault('parameters', {})['order'] = order
# add mapping meta if a custom label is set or if series annotation for var doesn't contain order options
# otherwise don't add mapping meta - it's redundant, nothing unique compared to series annotation
if len(mapping_annotation):
mapping_annotation['aes'] = aesthetic
mapping_annotation['annotation'] = 'as_discrete'
mapping_annotations.append(mapping_annotation)
data_meta = {}
if len(series_annotations) > 0:
data_meta.update({'series_annotations': list(series_annotations.values())})
if len(mapping_annotations) > 0:
data_meta.update({'mapping_annotations': mapping_annotations})
return data, aes(**mappings), {'data_meta': data_meta}