in src/jobs/TuneGenTopicModel.py [0:0]
def start(self):
# Distill fine tuned model
DISTILLATION_CONFIG = {
"learning_rate": 3e-4,
"batch_size": 32,
"model_name": "google/t5-efficient-tiny",
"label_column": "output",
"use_keywords": True,
"single_tab_handling": False,
"learning_rate_decay": False,
"shorten_training_label_boost": 0.06,
"shrink_decoder_index_remove": "2",
"shrink_encoder_index_remove": "2",
"teacher_model_artifact": "moso/tab_grouping/model-v40xoz3q:v0" # Revived-dust fine tuning run
# Other Run artifacts tested sage-mountain-341 azure-frost-334 and noble-yogurt-330
}
# initial fine tuning of a large model. Artifiact is then distilled with distillation config
_FINE_TUNING_CONFIG = {
"learning_rate": 3e-4,
"batch_size": 4,
"model_name": "google/flan-t5-base",
"label_column": "output",
"use_keywords": True,
"single_tab_handling": False,
"learning_rate_decay": False,
"shorten_training_label_boost": 0.05
}
self.configs = [
DISTILLATION_CONFIG
]
self._skip_configs = [
]
self.next(self.train, foreach='configs')