in src/sagemaker_training/recordio.py [0:0]
def _write_spmatrix_to_sparse_tensor(file, array, labels=None):
"""Writes a scipy sparse matrix to a sparse tensor.
Args:
file (file-like object): File-like object where the
records will be written.
array (array-like): A sparse matrix containing features.
labels (numpy array): Numpy array containing the labels.
"""
if not issparse(array):
raise TypeError("Array must be sparse")
# Validate shape of array and labels, resolve array and label types
if not len(array.shape) == 2:
raise ValueError("Array must be a Matrix")
if labels is not None:
if not len(labels.shape) == 1:
raise ValueError("Labels must be a Vector")
if labels.shape[0] not in array.shape:
raise ValueError(
"Label shape {} not compatible with array shape {}".format(
labels.shape, array.shape
)
)
resolved_label_type = _resolve_type(labels.dtype)
resolved_type = _resolve_type(array.dtype)
csr_array = array.tocsr()
n_rows, n_cols = csr_array.shape
record = Record()
for row_idx in range(n_rows):
record.Clear()
row = csr_array.getrow(row_idx)
# Write values
_write_feature_tensor(resolved_type, record, row.data)
# Write keys
_write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64))
# Write labels
if labels is not None:
_write_label_tensor(resolved_label_type, record, labels[row_idx])
# Write shape
_write_shape(resolved_type, record, n_cols)
_write_recordio(file, record.SerializeToString())