def _normalize_tabular_data()

in c3dm/tools/tabulate.py [0:0]


def _normalize_tabular_data(tabular_data, headers, showindex="default"):
	"""Transform a supported data type to a list of lists, and a list of headers.

	Supported tabular data types:

	* list-of-lists or another iterable of iterables

	* list of named tuples (usually used with headers="keys")

	* list of dicts (usually used with headers="keys")

	* list of OrderedDicts (usually used with headers="keys")

	* 2D NumPy arrays

	* NumPy record arrays (usually used with headers="keys")

	* dict of iterables (usually used with headers="keys")

	* pandas.DataFrame (usually used with headers="keys")

	The first row can be used as headers if headers="firstrow",
	column indices can be used as headers if headers="keys".

	If showindex="default", show row indices of the pandas.DataFrame.
	If showindex="always", show row indices for all types of data.
	If showindex="never", don't show row indices for all types of data.
	If showindex is an iterable, show its values as row indices.

	"""

	try:
		bool(headers)
		is_headers2bool_broken = False
	except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
		is_headers2bool_broken = True
		headers = list(headers)

	index = None
	if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
		# dict-like and pandas.DataFrame?
		if hasattr(tabular_data.values, "__call__"):
			# likely a conventional dict
			keys = tabular_data.keys()
			rows = list(izip_longest(*tabular_data.values()))  # columns have to be transposed
		elif hasattr(tabular_data, "index"):
			# values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
			keys = list(tabular_data)
			if tabular_data.index.name is not None:
				if isinstance(tabular_data.index.name, list):
					keys[:0] = tabular_data.index.name
				else:
					keys[:0] = [tabular_data.index.name]
			vals = tabular_data.values  # values matrix doesn't need to be transposed
			# for DataFrames add an index per default
			index = list(tabular_data.index)
			rows = [list(row) for row in vals]
		else:
			raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")

		if headers == "keys":
			headers = list(map(_text_type,keys))  # headers should be strings

	else:  # it's a usual an iterable of iterables, or a NumPy array
		rows = list(tabular_data)

		if (headers == "keys" and not rows):
			# an empty table (issue #81)
			headers = []
		elif (headers == "keys" and
			hasattr(tabular_data, "dtype") and
			getattr(tabular_data.dtype, "names")):
			# numpy record array
			headers = tabular_data.dtype.names
		elif (headers == "keys"
			  and len(rows) > 0
			  and isinstance(rows[0], tuple)
			  and hasattr(rows[0], "_fields")):
			# namedtuple
			headers = list(map(_text_type, rows[0]._fields))
		elif (len(rows) > 0
			  and isinstance(rows[0], dict)):
			# dict or OrderedDict
			uniq_keys = set() # implements hashed lookup
			keys = [] # storage for set
			if headers == "firstrow":
				firstdict = rows[0] if len(rows) > 0 else {}
				keys.extend(firstdict.keys())
				uniq_keys.update(keys)
				rows = rows[1:]
			for row in rows:
				for k in row.keys():
					#Save unique items in input order
					if k not in uniq_keys:
						keys.append(k)
						uniq_keys.add(k)
			if headers == 'keys':
				headers = keys
			elif isinstance(headers, dict):
				# a dict of headers for a list of dicts
				headers = [headers.get(k, k) for k in keys]
				headers = list(map(_text_type, headers))
			elif headers == "firstrow":
				if len(rows) > 0:
					headers = [firstdict.get(k, k) for k in keys]
					headers = list(map(_text_type, headers))
				else:
					headers = []
			elif headers:
				raise ValueError('headers for a list of dicts is not a dict or a keyword')
			rows = [[row.get(k) for k in keys] for row in rows]

		elif (headers == "keys"
			  and hasattr(tabular_data, "description")
			  and hasattr(tabular_data, "fetchone")
			  and hasattr(tabular_data, "rowcount")):
			# Python Database API cursor object (PEP 0249)
			# print tabulate(cursor, headers='keys')
			headers = [column[0] for column in tabular_data.description]

		elif headers == "keys" and len(rows) > 0:
			# keys are column indices
			headers = list(map(_text_type, range(len(rows[0]))))

	# take headers from the first row if necessary
	if headers == "firstrow" and len(rows) > 0:
		if index is not None:
			headers = [index[0]] + list(rows[0])
			index = index[1:]
		else:
			headers = rows[0]
		headers = list(map(_text_type, headers)) # headers should be strings
		rows = rows[1:]

	headers = list(map(_text_type,headers))
	rows = list(map(list,rows))

	# add or remove an index column
	showindex_is_a_str = type(showindex) in [_text_type, _binary_type]
	if showindex == "default" and index is not None:
		rows = _prepend_row_index(rows, index)
	elif isinstance(showindex, Iterable) and not showindex_is_a_str:
		rows = _prepend_row_index(rows, list(showindex))
	elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
		if index is None:
			index = list(range(len(rows)))
		rows = _prepend_row_index(rows, index)
	elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
		pass

	# pad with empty headers for initial columns if necessary
	if headers and len(rows) > 0:
	   nhs = len(headers)
	   ncols = len(rows[0])
	   if nhs < ncols:
		   headers = [""]*(ncols - nhs) + headers

	return rows, headers