in utils/generate_vertical_tabert_training_data.py [0:0]
def sample_context(example: Example, max_context_length: int, context_sample_strategy: str = 'nearest') -> Iterator:
if context_sample_strategy == 'nearest':
selected_context = []
context_before, context_after = example.context[0], example.context[1]
context_src = 'before'
if not context_before:
context = context_after
context_src = 'after'
elif not context_after:
context = context_before
elif random() < 0.5:
context = context_after
context_src = 'after'
else:
context = context_before
if context_src == 'before':
for i in reversed(range(0, len(context))):
sent = context[i]
selected_context = sent + selected_context
if len(selected_context) > max_context_length:
selected_context = selected_context[-max_context_length:] # only keep context close to the table
break
elif context_src == 'after':
for i in range(0, len(context)):
sent = context[i]
selected_context = selected_context + sent
if len(selected_context) > max_context_length:
selected_context = selected_context[:max_context_length] # only keep context close to the table
break
if selected_context:
yield selected_context
elif context_sample_strategy == 'concate_and_enumerate':
# concatenate the context before and after, select a random chunk of text
all_context = example.context[0] + example.context[1]
selected_context = []
for i in range(len(all_context)):
sent = all_context[i]
selected_context.extend(sent)
if len(selected_context) > max_context_length:
selected_context = selected_context[:max_context_length]
if selected_context:
yield selected_context
selected_context = []
if selected_context:
yield selected_context
else:
raise RuntimeError('Unknown context sample strategy')