in pyrit/score/human_in_the_loop_scorer.py [0:0]
def score_prompt_manually(self, request_response: PromptRequestPiece, *, task: Optional[str] = None) -> list[Score]:
"""
Manually score the prompt
Args:
request_response (PromptRequestPiece): The prompt request piece to score.
task (str): The task based on which the text should be scored (the original attacker model's objective).
Returns:
list of scores
"""
self.validate(request_response, task=task)
score_value = ""
score_category = ""
while not score_value or not score_category:
if not score_category:
score_category = self._get_user_input("Please enter score category (e.g., 'hate' or 'violence').")
if not score_value:
message = f"""This prompt has not been scored yet, please manually score the prompt.
The prompt is: {request_response.converted_value}\n
Please enter a score value
(e.g., 'True' for true_false or a value between '0.0' and '1.0 for float_scale): """
score_value = self._get_user_input(message)
score_type = self._get_score_type(score_value)
score_value_description = self._get_user_input(
"Enter score value description (optional, press 'Enter' to skip): "
)
score_rationale = self._get_user_input("Enter score rationale (optional, press 'Enter' to skip): ")
score_metadata = self._get_user_input("Enter score metadata (optional, press 'Enter' to skip): ")
score = Score(
score_value=score_value,
score_value_description=score_value_description,
score_type=score_type, # type: ignore
score_category=score_category,
score_rationale=score_rationale,
score_metadata=score_metadata,
scorer_class_identifier=self.get_identifier(),
prompt_request_response_id=request_response.id,
task=task,
)
return [score]