in container/neo_template_xgboost.py [0:0]
def preprocess(self, batch_data):
assert self._batch_size == len(batch_data), \
'Invalid input batch size: expected {} but got {}'.format(self._batch_size,
len(batch_data))
processed_batch_data = []
for k in range(len(batch_data)):
req_body = batch_data[k]
content_type = self._context.get_request_header(k, 'Content-type')
if content_type is None:
content_type = self._context.get_request_header(k, 'Content-Type')
if content_type is None:
raise Exception('Content type could not be deduced')
payload = batch_data[k].get('data')
if payload is None:
payload = batch_data[k].get('body')
if payload is None:
raise Exception('Nonexistent payload')
if content_type == 'text/libsvm' or content_type == 'text/x-libsvm':
row = []
col = []
entries = []
payload = payload.rstrip().split('\n')
colon = ':'
for row_idx, line in enumerate(payload):
for entry in line.split(' '):
if colon in entry:
token = entry.split(colon)
col_idx, val = token[0], token[1]
row.append(row_idx)
col.append(col_idx)
entries.append(val)
row = np.array(row)
col = np.array(col).astype(np.int)
entries = np.array(entries).astype(np.float)
mat = csr_matrix((entries, (row, col)))
processed_batch_data.append(_sparse_to_dense(mat))
elif content_type == 'text/csv':
payload = payload.rstrip().split('\n')
delimiter = csv.Sniffer().sniff(payload[0]).delimiter
batch = list(map(lambda x: x.split(delimiter), payload))
processed_batch_data.append(np.array(batch).astype(np.float))
else:
raise Exception('ClientError: Invalid content type. Accepted content types are ' +
'"text/libsvm" and "text/csv". Received {}'.format(content_type))
return processed_batch_data