in course/en/chapter13/grpo_length.py [0:0]
def _(mo, slider):
import plotly.express as px
# Toy dataset with 5 samples of different lengths
completions = [
"Short", # 5 chars
"Medium length text", # 18 chars
"This is about twenty chars", # 25 chars
"This is a slightly longer completion", # 36 chars
"This is a much longer completion with more words", # 45 chars
]
maximum_length = max(len(completion) for completion in completions)
minimum_length = min(len(completion) for completion in completions)
def length_reward(completions, ideal_length):
"""
Calculate rewards based on the length of completions.
Args:
completions: List of text completions
ideal_length: Target length in characters
Returns:
List of reward scores for each completion
"""
rewards = []
for completion in completions:
length = len(completion)
# Simple reward function: negative absolute difference
reward = maximum_length - abs(length - ideal_length)
reward = max(0, reward)
reward = min(1, reward / (maximum_length - minimum_length))
rewards.append(reward)
return rewards
# Calculate rewards for the examples
rewards = length_reward(completions=completions, ideal_length=slider.value)
# Display the examples and their rewards
results = []
for completion, reward in zip(completions, rewards):
results.append(
{"Completion": completion, "Length": len(completion), "Reward": reward}
)
fig = px.bar(results, x="Completion", y="Reward", color="Length")
mo.ui.plotly(fig)