def _infer_type_pandas_dataframe()

in python-package/lets_plot/plot/series_meta.py [0:0]


def _infer_type_pandas_dataframe(var_name: str, var_content) -> str:
    if var_content.empty:
        return TYPE_UNKNOWN
    elif var_content.isna().all():
        return TYPE_UNKNOWN

    lp_dtype = TYPE_UNKNOWN
    time_zone = None
    pandas_dtype = pandas.api.types.infer_dtype(var_content.values, skipna=True)

    if pandas_dtype == "categorical":
        dtype = var_content.cat.categories.dtype

        if numpy.issubdtype(dtype, numpy.integer):
            lp_dtype = TYPE_INTEGER
        elif numpy.issubdtype(dtype, numpy.floating):
            lp_dtype = TYPE_FLOATING
        elif numpy.issubdtype(dtype, numpy.object_):
            # Check if all elements are strings
            if all(isinstance(x, str) for x in var_content.cat.categories):
                lp_dtype = TYPE_STRING
    else:
        # see https://pandas.pydata.org/docs/reference/api/pandas.api.types.infer_dtype.html
        if pandas_dtype == 'string':
            lp_dtype = TYPE_STRING
        elif pandas_dtype == 'floating':
            lp_dtype = TYPE_FLOATING
        elif pandas_dtype == 'integer':
            lp_dtype = TYPE_INTEGER
        elif pandas_dtype == 'boolean':
            lp_dtype = TYPE_BOOLEAN

        elif pandas_dtype == 'datetime64' or pandas_dtype == 'datetime':
            lp_dtype = TYPE_DATE_TIME
        elif pandas_dtype == "date":
            lp_dtype = TYPE_DATE
        elif pandas_dtype == "time":
            lp_dtype = TYPE_TIME

        elif pandas_dtype == 'empty':  # for columns with all None values
            lp_dtype = TYPE_UNKNOWN
        else:
            lp_dtype = 'unknown(pandas:' + pandas_dtype + ')'

    return lp_dtype