in src/autotrain/preprocessor/text.py [0:0]
def __post_init__(self):
# check if text_column, question_column, and answer_column are in train_data
if self.text_column not in self.train_data.columns:
raise ValueError(f"{self.text_column} not in train data")
if self.question_column not in self.train_data.columns:
raise ValueError(f"{self.question_column} not in train data")
if self.answer_column not in self.train_data.columns:
raise ValueError(f"{self.answer_column} not in train data")
# check if text_column, question_column, and answer_column are in valid_data
if self.valid_data is not None:
if self.text_column not in self.valid_data.columns:
raise ValueError(f"{self.text_column} not in valid data")
if self.question_column not in self.valid_data.columns:
raise ValueError(f"{self.question_column} not in valid data")
if self.answer_column not in self.valid_data.columns:
raise ValueError(f"{self.answer_column} not in valid data")
# make sure no reserved columns are in train_data or valid_data
for column in RESERVED_COLUMNS:
if column in self.train_data.columns:
raise ValueError(f"{column} is a reserved column name")
if self.valid_data is not None:
if column in self.valid_data.columns:
raise ValueError(f"{column} is a reserved column name")
# convert answer_column to dict
try:
self.train_data.loc[:, self.answer_column] = self.train_data[self.answer_column].apply(
lambda x: ast.literal_eval(x)
)
except ValueError:
logger.warning("Unable to do ast.literal_eval on train_data[answer_column]")
logger.warning("assuming answer_column is already a dict")
if self.valid_data is not None:
try:
self.valid_data.loc[:, self.answer_column] = self.valid_data[self.answer_column].apply(
lambda x: ast.literal_eval(x)
)
except ValueError:
logger.warning("Unable to do ast.literal_eval on valid_data[answer_column]")
logger.warning("assuming answer_column is already a dict")