in simulation/decai/simulation/contract/classification/scikit_classifier.py [0:0]
def export(self,
path: str,
classifications: List[str] = None,
model_type: str = None,
feature_index_mapping: FeatureIndexMapping = None):
assert self._model is not None, "The model has not been initialized yet."
if isinstance(self._model, SGDClassifier) and self._model.loss == 'perceptron':
if classifications is None:
classifications = ["0", "1"]
model = {
'type': model_type or 'sparse perceptron',
'classifications': classifications,
'weights': self._model.coef_[0].tolist(),
'intercept': self._model.intercept_[0],
}
if feature_index_mapping is not None:
if model_type is None:
model['type'] = 'sparse perceptron'
weights = model['weights']
del model['weights']
weights = {str(i): v for (i, v) in zip(feature_index_mapping, weights) if v != 0}
model['sparseWeights'] = weights
elif isinstance(self._model, MultinomialNB):
if classifications is None:
classifications = list(map(str, range(self._model.feature_count_.shape[1])))
feature_counts = []
for class_features in self._model.feature_count_:
class_feature_counts = []
for index, count in enumerate(class_features):
if count != 0:
# Counts should already be integers.
if feature_index_mapping is not None:
index = feature_index_mapping[index]
class_feature_counts.append((index, int(count)))
feature_counts.append(class_feature_counts)
model = {
'type': model_type or 'naive bayes',
'classifications': classifications,
'classCounts': self._model.class_count_.astype(dtype=np.int64).tolist(),
'featureCounts': feature_counts,
'totalNumFeatures': self._model.feature_count_.shape[1],
'smoothingFactor': self._model.alpha,
}
elif isinstance(self._model, NearestCentroidClassifier):
if feature_index_mapping is not None:
if model_type is None:
model_type = 'sparse nearest centroid classifier'
centroids = dict()
if classifications is None:
classifications = list(map(str, range(len(self.centroids_))))
for i, classification in enumerate(classifications):
centroid = self._model.centroids_[i].tolist()
if feature_index_mapping is not None:
centroid = {str(i): v for (i, v) in zip(feature_index_mapping, centroid) if v != 0}
centroids[classification] = dict(
centroid=centroid,
dataCount=self._model._num_samples_per_centroid[i])
model = {
'type': model_type or 'nearest centroid classifier',
'centroids': centroids,
}
else:
raise Exception("Unrecognized model type.")
with open(path, 'w') as f:
json.dump(model, f, separators=(',', ':'))