in c3dm/tools/tabulate.py [0:0]
def _normalize_tabular_data(tabular_data, headers, showindex="default"):
"""Transform a supported data type to a list of lists, and a list of headers.
Supported tabular data types:
* list-of-lists or another iterable of iterables
* list of named tuples (usually used with headers="keys")
* list of dicts (usually used with headers="keys")
* list of OrderedDicts (usually used with headers="keys")
* 2D NumPy arrays
* NumPy record arrays (usually used with headers="keys")
* dict of iterables (usually used with headers="keys")
* pandas.DataFrame (usually used with headers="keys")
The first row can be used as headers if headers="firstrow",
column indices can be used as headers if headers="keys".
If showindex="default", show row indices of the pandas.DataFrame.
If showindex="always", show row indices for all types of data.
If showindex="never", don't show row indices for all types of data.
If showindex is an iterable, show its values as row indices.
"""
try:
bool(headers)
is_headers2bool_broken = False
except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
is_headers2bool_broken = True
headers = list(headers)
index = None
if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
# dict-like and pandas.DataFrame?
if hasattr(tabular_data.values, "__call__"):
# likely a conventional dict
keys = tabular_data.keys()
rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed
elif hasattr(tabular_data, "index"):
# values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
keys = list(tabular_data)
if tabular_data.index.name is not None:
if isinstance(tabular_data.index.name, list):
keys[:0] = tabular_data.index.name
else:
keys[:0] = [tabular_data.index.name]
vals = tabular_data.values # values matrix doesn't need to be transposed
# for DataFrames add an index per default
index = list(tabular_data.index)
rows = [list(row) for row in vals]
else:
raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
if headers == "keys":
headers = list(map(_text_type,keys)) # headers should be strings
else: # it's a usual an iterable of iterables, or a NumPy array
rows = list(tabular_data)
if (headers == "keys" and not rows):
# an empty table (issue #81)
headers = []
elif (headers == "keys" and
hasattr(tabular_data, "dtype") and
getattr(tabular_data.dtype, "names")):
# numpy record array
headers = tabular_data.dtype.names
elif (headers == "keys"
and len(rows) > 0
and isinstance(rows[0], tuple)
and hasattr(rows[0], "_fields")):
# namedtuple
headers = list(map(_text_type, rows[0]._fields))
elif (len(rows) > 0
and isinstance(rows[0], dict)):
# dict or OrderedDict
uniq_keys = set() # implements hashed lookup
keys = [] # storage for set
if headers == "firstrow":
firstdict = rows[0] if len(rows) > 0 else {}
keys.extend(firstdict.keys())
uniq_keys.update(keys)
rows = rows[1:]
for row in rows:
for k in row.keys():
#Save unique items in input order
if k not in uniq_keys:
keys.append(k)
uniq_keys.add(k)
if headers == 'keys':
headers = keys
elif isinstance(headers, dict):
# a dict of headers for a list of dicts
headers = [headers.get(k, k) for k in keys]
headers = list(map(_text_type, headers))
elif headers == "firstrow":
if len(rows) > 0:
headers = [firstdict.get(k, k) for k in keys]
headers = list(map(_text_type, headers))
else:
headers = []
elif headers:
raise ValueError('headers for a list of dicts is not a dict or a keyword')
rows = [[row.get(k) for k in keys] for row in rows]
elif (headers == "keys"
and hasattr(tabular_data, "description")
and hasattr(tabular_data, "fetchone")
and hasattr(tabular_data, "rowcount")):
# Python Database API cursor object (PEP 0249)
# print tabulate(cursor, headers='keys')
headers = [column[0] for column in tabular_data.description]
elif headers == "keys" and len(rows) > 0:
# keys are column indices
headers = list(map(_text_type, range(len(rows[0]))))
# take headers from the first row if necessary
if headers == "firstrow" and len(rows) > 0:
if index is not None:
headers = [index[0]] + list(rows[0])
index = index[1:]
else:
headers = rows[0]
headers = list(map(_text_type, headers)) # headers should be strings
rows = rows[1:]
headers = list(map(_text_type,headers))
rows = list(map(list,rows))
# add or remove an index column
showindex_is_a_str = type(showindex) in [_text_type, _binary_type]
if showindex == "default" and index is not None:
rows = _prepend_row_index(rows, index)
elif isinstance(showindex, Iterable) and not showindex_is_a_str:
rows = _prepend_row_index(rows, list(showindex))
elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
if index is None:
index = list(range(len(rows)))
rows = _prepend_row_index(rows, index)
elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
pass
# pad with empty headers for initial columns if necessary
if headers and len(rows) > 0:
nhs = len(headers)
ncols = len(rows[0])
if nhs < ncols:
headers = [""]*(ncols - nhs) + headers
return rows, headers