def _()

in course/en/chapter13/grpo_length.py [0:0]


def _(mo, slider):
    import plotly.express as px

    # Toy dataset with 5 samples of different lengths
    completions = [
        "Short",  # 5 chars
        "Medium length text",  # 18 chars
        "This is about twenty chars",  # 25 chars
        "This is a slightly longer completion",  # 36 chars
        "This is a much longer completion with more words",  # 45 chars
    ]

    maximum_length = max(len(completion) for completion in completions)
    minimum_length = min(len(completion) for completion in completions)

    def length_reward(completions, ideal_length):
        """
        Calculate rewards based on the length of completions.

        Args:
            completions: List of text completions
            ideal_length: Target length in characters

        Returns:
            List of reward scores for each completion
        """
        rewards = []

        for completion in completions:
            length = len(completion)
            # Simple reward function: negative absolute difference
            reward = maximum_length - abs(length - ideal_length)
            reward = max(0, reward)
            reward = min(1, reward / (maximum_length - minimum_length))
            rewards.append(reward)

        return rewards

    # Calculate rewards for the examples
    rewards = length_reward(completions=completions, ideal_length=slider.value)

    # Display the examples and their rewards
    results = []
    for completion, reward in zip(completions, rewards):
        results.append(
            {"Completion": completion, "Length": len(completion), "Reward": reward}
        )

    fig = px.bar(results, x="Completion", y="Reward", color="Length")
    mo.ui.plotly(fig)