in source/predictive_maintenance/pandas/core/groupby/grouper.py [0:0]
def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
observed=False, mutated=False, validate=True):
"""
create and return a BaseGrouper, which is an internal
mapping of how to create the grouper indexers.
This may be composed of multiple Grouping objects, indicating
multiple groupers
Groupers are ultimately index mappings. They can originate as:
index mappings, keys to columns, functions, or Groupers
Groupers enable local references to axis,level,sort, while
the passed in axis, level, and sort are 'global'.
This routine tries to figure out what the passing in references
are and then creates a Grouping for each one, combined into
a BaseGrouper.
If observed & we have a categorical grouper, only show the observed
values
If validate, then check for key/level overlaps
"""
group_axis = obj._get_axis(axis)
# validate that the passed single level is compatible with the passed
# axis of the object
if level is not None:
# TODO: These if-block and else-block are almost same.
# MultiIndex instance check is removable, but it seems that there are
# some processes only for non-MultiIndex in else-block,
# eg. `obj.index.name != level`. We have to consider carefully whether
# these are applicable for MultiIndex. Even if these are applicable,
# we need to check if it makes no side effect to subsequent processes
# on the outside of this condition.
# (GH 17621)
if isinstance(group_axis, MultiIndex):
if is_list_like(level) and len(level) == 1:
level = level[0]
if key is None and is_scalar(level):
# Get the level values from group_axis
key = group_axis.get_level_values(level)
level = None
else:
# allow level to be a length-one list-like object
# (e.g., level=[0])
# GH 13901
if is_list_like(level):
nlevels = len(level)
if nlevels == 1:
level = level[0]
elif nlevels == 0:
raise ValueError('No group keys passed!')
else:
raise ValueError('multiple levels only valid with '
'MultiIndex')
if isinstance(level, compat.string_types):
if obj.index.name != level:
raise ValueError('level name {} is not the name of the '
'index'.format(level))
elif level > 0 or level < -1:
raise ValueError(
'level > 0 or level < -1 only valid with MultiIndex')
# NOTE: `group_axis` and `group_axis.get_level_values(level)`
# are same in this section.
level = None
key = group_axis
# a passed-in Grouper, directly convert
if isinstance(key, Grouper):
binner, grouper, obj = key._get_grouper(obj, validate=False)
if key.key is None:
return grouper, [], obj
else:
return grouper, {key.key}, obj
# already have a BaseGrouper, just return it
elif isinstance(key, BaseGrouper):
return key, [], obj
# In the future, a tuple key will always mean an actual key,
# not an iterable of keys. In the meantime, we attempt to provide
# a warning. We can assume that the user wanted a list of keys when
# the key is not in the index. We just have to be careful with
# unhashble elements of `key`. Any unhashable elements implies that
# they wanted a list of keys.
# https://github.com/pandas-dev/pandas/issues/18314
is_tuple = isinstance(key, tuple)
all_hashable = is_tuple and is_hashable(key)
if is_tuple:
if ((all_hashable and key not in obj and set(key).issubset(obj))
or not all_hashable):
# column names ('a', 'b') -> ['a', 'b']
# arrays like (a, b) -> [a, b]
msg = ("Interpreting tuple 'by' as a list of keys, rather than "
"a single key. Use 'by=[...]' instead of 'by=(...)'. In "
"the future, a tuple will always mean a single key.")
warnings.warn(msg, FutureWarning, stacklevel=5)
key = list(key)
if not isinstance(key, list):
keys = [key]
match_axis_length = False
else:
keys = key
match_axis_length = len(keys) == len(group_axis)
# what are we after, exactly?
any_callable = any(callable(g) or isinstance(g, dict) for g in keys)
any_groupers = any(isinstance(g, Grouper) for g in keys)
any_arraylike = any(isinstance(g, (list, tuple, Series, Index, np.ndarray))
for g in keys)
try:
if isinstance(obj, DataFrame):
all_in_columns_index = all(g in obj.columns or g in obj.index.names
for g in keys)
else:
all_in_columns_index = False
except Exception:
all_in_columns_index = False
if (not any_callable and not all_in_columns_index and
not any_arraylike and not any_groupers and
match_axis_length and level is None):
keys = [com.asarray_tuplesafe(keys)]
if isinstance(level, (tuple, list)):
if key is None:
keys = [None] * len(level)
levels = level
else:
levels = [level] * len(keys)
groupings = []
exclusions = []
# if the actual grouper should be obj[key]
def is_in_axis(key):
if not _is_label_like(key):
try:
obj._data.items.get_loc(key)
except Exception:
return False
return True
# if the grouper is obj[name]
def is_in_obj(gpr):
try:
return id(gpr) == id(obj[gpr.name])
except Exception:
return False
for i, (gpr, level) in enumerate(zip(keys, levels)):
if is_in_obj(gpr): # df.groupby(df['name'])
in_axis, name = True, gpr.name
exclusions.append(name)
elif is_in_axis(gpr): # df.groupby('name')
if gpr in obj:
if validate:
obj._check_label_or_level_ambiguity(gpr)
in_axis, name, gpr = True, gpr, obj[gpr]
exclusions.append(name)
elif obj._is_level_reference(gpr):
in_axis, name, level, gpr = False, None, gpr, None
else:
raise KeyError(gpr)
elif isinstance(gpr, Grouper) and gpr.key is not None:
# Add key to exclusions
exclusions.append(gpr.key)
in_axis, name = False, None
else:
in_axis, name = False, None
if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
raise ValueError(
("Length of grouper ({len_gpr}) and axis ({len_axis})"
" must be same length"
.format(len_gpr=len(gpr), len_axis=obj.shape[axis])))
# create the Grouping
# allow us to passing the actual Grouping as the gpr
ping = (Grouping(group_axis,
gpr,
obj=obj,
name=name,
level=level,
sort=sort,
observed=observed,
in_axis=in_axis)
if not isinstance(gpr, Grouping) else gpr)
groupings.append(ping)
if len(groupings) == 0:
raise ValueError('No group keys passed!')
# create the internals grouper
grouper = BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated)
return grouper, exclusions, obj