def preprocess()

in container/neo_template_xgboost.py [0:0]
45 lines of code
12 McCabe index (conditional complexity)

    def preprocess(self, batch_data):
        assert self._batch_size == len(batch_data), \
            'Invalid input batch size: expected {} but got {}'.format(self._batch_size,
                                                                      len(batch_data))
        processed_batch_data = []

        for k in range(len(batch_data)):
            req_body = batch_data[k]
            content_type = self._context.get_request_header(k, 'Content-type')
            if content_type is None:
                content_type = self._context.get_request_header(k, 'Content-Type')
                if content_type is None:
                    raise Exception('Content type could not be deduced')

            payload = batch_data[k].get('data')
            if payload is None:
                payload = batch_data[k].get('body')
            if payload is None:
                raise Exception('Nonexistent payload')

            if content_type == 'text/libsvm' or content_type == 'text/x-libsvm':
                row = []
                col = []
                entries = []
                payload = payload.rstrip().split('\n')
                colon = ':'
                for row_idx, line in enumerate(payload):
                    for entry in line.split(' '):
                        if colon in entry:
                            token = entry.split(colon)
                            col_idx, val = token[0], token[1]
                            row.append(row_idx)
                            col.append(col_idx)
                            entries.append(val)
                row = np.array(row)
                col = np.array(col).astype(np.int)
                entries = np.array(entries).astype(np.float)
                mat = csr_matrix((entries, (row, col)))
                processed_batch_data.append(_sparse_to_dense(mat))
            elif content_type == 'text/csv':
                payload = payload.rstrip().split('\n')
                delimiter = csv.Sniffer().sniff(payload[0]).delimiter
                batch = list(map(lambda x: x.split(delimiter), payload))
                processed_batch_data.append(np.array(batch).astype(np.float))
            else:
                raise Exception('ClientError: Invalid content type. Accepted content types are ' +
                                '"text/libsvm" and "text/csv". Received {}'.format(content_type))

        return processed_batch_data