in src/models/readers/sst_reader.py [0:0]
def text_to_instance(self, tokens: List[str], sentiment: str = None) -> Optional[Instance]:
"""
We take `pre-tokenized` input here, because we might not have a tokenizer in this class.
# Parameters
tokens : `List[str]`, required.
The tokens in a given sentence.
sentiment : `str`, optional, (default = `None`).
The sentiment for this sentence.
# Returns
An `Instance` containing the following fields:
tokens : `TextField`
The tokens in the sentence or phrase.
label : `LabelField`
The sentiment label of the sentence or phrase.
"""
assert isinstance(
tokens, list
)
if self._tokenizer is None:
def make_token(t: Union[str, Token]):
if isinstance(t, str):
return Token(t)
elif isinstance(t, Token):
return t
else:
raise ValueError("Tokens must be either str or Token.")
tokens = [make_token(x) for x in tokens]
else:
tokens = self._tokenizer.tokenize(join_tokens(tokens))
text_field = TextField(tokens, token_indexers=self._token_indexers)
fields: Dict[str, Field] = {"tokens": text_field}
if sentiment is not None:
# 0 and 1 are negative sentiment, 2 is neutral, and 3 and 4 are positive sentiment
# In 5-class, we use labels as is.
# 3-class reduces the granularity, and only asks the model to predict
# negative, neutral, or positive.
# 2-class further reduces the granularity by only asking the model to
# predict whether an instance is negative or positive.
if self._granularity == "3-class":
if int(sentiment) < 2:
sentiment = "0"
elif int(sentiment) == 2:
sentiment = "1"
else:
sentiment = "2"
elif self._granularity == "2-class":
if int(sentiment) < 2:
sentiment = "0"
elif int(sentiment) == 2:
return None
else:
sentiment = "1"
fields["label"] = LabelField(sentiment)
return Instance(fields)