in genai-on-vertex-ai/gemini/needle_in_a_haystack/needlehaystack/llm_needle_haystack_tester.py [0:0]
def __init__(self,
model_to_test: ModelProvider = None,
evaluator: Evaluator = None,
dynamic_needle = True,
needle = None,
haystack_dir = "PaulGrahamEssays",
retrieval_question = None,
results_version = 1,
context_lengths_min = 1000,
context_lengths_max = 16000,
context_lengths_num_intervals = 35,
context_lengths = None,
document_depth_percent_min = 0,
document_depth_percent_max = 100,
document_depth_percent_intervals = 35,
document_depth_percents = None,
document_depth_percent_interval_type = "linear",
num_concurrent_requests = 1,
save_results = True,
save_contexts = True,
final_context_length_buffer = 200,
seconds_to_sleep_between_completions = None,
print_ongoing_status = True,
**kwargs):
"""
:model_to_test: The model to test. Default is None.
:evaluator: An evaluator to evaluate the model's response. Default is None.
:param needle: The needle to be found in the haystack. Default is None.
:param haystack_dir: The directory of text files to use as background context (or a haystack) in which the needle is to be found. Default is Paul Graham Essays.
:param retrieval_question: The question which with to prompt the model to do the retrieval.
:param results_version: In case you would like to try the same combination of model, context length, and depth % multiple times, change the results version other than 1
:param num_concurrent_requests: Due to volume, this object is set up to run concurrent requests, default = 1. Be careful of rate limits.
:param save_results: Whether or not you would like to save your contexts to file. Warning: These will get long! Default = True
:param save_contexts: Whether or not you would like to save your contexts to file. Warning: These will get long! Default is True.
:param final_context_length_buffer: The amount of cushion you'd like to leave off the input context to allow for the output context. Default 200 tokens
:param context_lengths_min: The minimum length of the context. Default is 1000.
:param context_lengths_max: The maximum length of the context. Default is 200000.
:param context_lengths_num_intervals: The number of intervals for the context length. Default is 35.
:param context_lengths: The lengths of the context. Default is None.
:param document_depth_percent_min: The minimum depth percent of the document. Default is 0.
:param document_depth_percent_max: The maximum depth percent of the document. Default is 100.
:param document_depth_percent_intervals: The number of intervals for the document depth percent. Default is 35.
:param document_depth_percents: The depth percentages of the document. Default is None.
:param document_depth_percent_interval_type: The type of interval for the document depth percent. Must be either 'linear' or 'sigmoid'. Default is 'linear'.
:param seconds_to_sleep_between_completions: The number of seconds to sleep between completions. Default is None.
:param print_ongoing_status: Whether or not to print the ongoing status. Default is True.
:param kwargs: Additional arguments.
"""
if not model_to_test:
raise ValueError("A language model must be provided to test.")
if not needle or not haystack_dir or not retrieval_question:
raise ValueError("Needle, haystack, and retrieval_question must be provided.")
self.dynamic_needle = dynamic_needle
self.needle = needle
self.haystack_dir = haystack_dir
self.retrieval_question = retrieval_question
self.results_version = results_version
self.num_concurrent_requests = num_concurrent_requests
self.save_results = save_results
self.final_context_length_buffer = final_context_length_buffer
self.save_contexts = save_contexts
self.seconds_to_sleep_between_completions = seconds_to_sleep_between_completions
self.print_ongoing_status = print_ongoing_status
self.testing_results = []
if context_lengths is None:
if context_lengths_min is None or context_lengths_max is None or context_lengths_num_intervals is None:
raise ValueError("Either context_lengths_min, context_lengths_max, context_lengths_intervals need to be filled out OR the context_lengths_list needs to be supplied.")
else:
self.context_lengths = np.round(np.linspace(context_lengths_min, context_lengths_max, num=context_lengths_num_intervals, endpoint=True)).astype(int)
else:
self.context_lengths = context_lengths
if document_depth_percent_interval_type not in [None, "linear", "sigmoid"]:
raise ValueError("document_depth_percent_interval_type must be either None, 'linear' or 'sigmoid'. If you'd like your own distribution give a list of ints in via document_depth_percent_intervals")
if document_depth_percents is None:
if document_depth_percent_min is None or document_depth_percent_max is None or document_depth_percent_intervals is None:
raise ValueError("Either document_depth_percent_min, document_depth_percent_max, document_depth_percent_intervals need to be filled out OR the document_depth_percents needs to be supplied.")
if document_depth_percent_interval_type == 'linear':
self.document_depth_percents = np.round(np.linspace(document_depth_percent_min, document_depth_percent_max, num=document_depth_percent_intervals, endpoint=True)).astype(int)
elif document_depth_percent_interval_type == 'sigmoid':
self.document_depth_percents = [self.logistic(x) for x in np.linspace(document_depth_percent_min, document_depth_percent_max, document_depth_percent_intervals)]
else:
raise ValueError("document_depth_percent_interval_type must be either 'sigmoid' or 'linear' if document_depth_percents is None.")
else:
self.document_depth_percents = document_depth_percents
self.model_to_test = model_to_test
self.model_name = self.model_to_test.model_name
self.evaluation_model = evaluator