docker_images/fasttext/app/pipelines/text_classification.py (26 lines of code) (raw):
from typing import Dict, List
from app.pipelines import Pipeline
from huggingface_hub import HfApi
FASTTEXT_PREFIX_LENGTH = 9 # fasttext labels are formatted like "__label__eng_Latn"
class TextClassificationPipeline(Pipeline):
def __init__(
self,
model_id: str,
):
super().__init__(model_id)
self.info = HfApi().model_info(repo_id=self.model_id)
def __call__(self, inputs: str) -> List[Dict[str, float]]:
"""
Args:
inputs (:obj:`str`):
a string containing some text
Return:
A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing:
- "label": A string representing what the label/class is. There can be multiple labels.
- "score": A score between 0 and 1 describing how confident the model is for this label/class.
"""
if "language-identification" in self.info.tags:
preds = self.model.predict(inputs, k=5)
result = [
{"label": label[FASTTEXT_PREFIX_LENGTH:], "score": prob}
for label, prob in zip(preds[0], preds[1])
]
return [result]
if len(inputs.split()) > 1:
raise ValueError("Expected input is a single word")
preds = self.model.get_nearest_neighbors(inputs, k=5)
result = []
for distance, word in preds:
result.append({"label": word, "score": distance})
return [result]