text/data/smoltalk/constraints/pipeline/ifeval_tasks.py (54 lines of code) (raw):
import json
from typing import TYPE_CHECKING, Any
from distilabel.steps.tasks import Task
from system_prompts import (
IFEVAL_INSTRUCTION_ID_LIST_ASSIGNATOR_SYSTEM_PROMPT,
IFEVAL_KWARGS_ASSIGNATOR_SYSTEM_PROMPT,
)
if TYPE_CHECKING:
from distilabel.typing import ChatType, StepColumns
class IFEvalInstructionIdListAssignator(Task):
@property
def inputs(self) -> "StepColumns":
return ["instruction"]
def format_input(self, input: dict[str, Any]) -> "ChatType":
instruction = input["instruction"]
return [
{
"role": "system",
"content": IFEVAL_INSTRUCTION_ID_LIST_ASSIGNATOR_SYSTEM_PROMPT,
},
{"role": "user", "content": instruction},
]
@property
def outputs(self) -> "StepColumns":
return ["instruction_id_list"]
def format_output(
self, output: str | None, input: dict[str, Any] | None = None
) -> dict[str, Any]:
if output is None:
return {"instruction_id_list": None}
return json.loads(output)
class IFEvalKwargsAssignator(Task):
@property
def inputs(self) -> "StepColumns":
return ["instruction", "instruction_id_list"]
def format_input(self, input: dict[str, Any]) -> "ChatType":
instruction = input["instruction"]
instruction_id_list = "\n".join(input["instruction_id_list"])
return [
{"role": "system", "content": IFEVAL_KWARGS_ASSIGNATOR_SYSTEM_PROMPT},
{
"role": "user",
"content": f"## Instruction\n\n{instruction}## Instruction ID List\n\n{instruction_id_list}",
},
]
@property
def outputs(self) -> "StepColumns":
return ["kwargs"]
def format_output(
self, output: str | None, input: dict[str, Any] | None = None
) -> dict[str, Any]:
if output is None:
return {"kwargs": None}
return {"kwargs": output}