in deepseek_vl2/models/processing_deepseek_vl_v2.py [0:0]
def format_messages_v2(self, messages, pil_images, systems=None):
"""play the role of format_messages_v2 and get_images_info in the last version"""
tokenized_data = []
masked_tokenized_data = [] # labels
images_list = []
images_seq_mask = []
images_spatial_crop = []
num_image_tokens = []
image_index = 0
conv = get_conv_template(self.sft_format)
conv_system_message = conv.system_message
for idx, message in enumerate(messages):
if idx == 0:
tokenized_data += [self.bos_id]
masked_tokenized_data += [self.bos_id]
images_seq_mask += [False]
conv.system_message = conv_system_message
else:
conv.system_message = ''
if message['role'] == conv.roles[0] or message['role'] == "user":
conv.reset_message()
conv.append_message(conv.roles[0], str(message['content']).strip())
conv.append_message(conv.roles[1], '')
formatted_question = conv.get_prompt()
tokenized_str, images, seq_mask, spatial_crop, n_image_tokens = self.tokenize_with_images(
formatted_question,
pil_images[image_index: image_index + formatted_question.count(self.image_token)],
bos=False,
eos=False,
cropping=len(pil_images) <= 2
)
image_index += formatted_question.count(self.image_token)
tokenized_data += tokenized_str
if self.mask_prompt:
masked_tokenized_data += [self.ignore_id] * len(tokenized_str)
else:
masked_tokenized_data += tokenized_str
images_list += images
images_seq_mask += seq_mask
images_spatial_crop += spatial_crop
num_image_tokens += n_image_tokens
elif message['role'] == conv.roles[1] or message['role'] == "assistant":
formatted_answer = message['content'].strip()
assert formatted_answer.count(
self.image_token) == 0, f"there should be no {self.image_token} in the assistant's reply, but got {messages}"
tokenized_str, images, seq_mask, spatial_crop, n_image_tokens = self.tokenize_with_images(
formatted_answer,
[],
bos=False,
eos=True,
cropping=len(pil_images) <= 2)
tokenized_data += tokenized_str
masked_tokenized_data += tokenized_str
images_seq_mask += seq_mask
elif message['role'] == 'system' or message['role'] == 'deepseekapi-sys':
# 如果message里面有system,那就只允许出现在message的第一句,同时conv原本的system就会失效
assert idx == 0, 'system information should only exist in the begining of the conversation'
formatted_system = message['content'].strip()
tokenized_str = self.encode(formatted_system, bos=False, eos=False)
tokenized_data += tokenized_str
if self.mask_prompt:
masked_tokenized_data += [self.ignore_id] * len(tokenized_str)
else:
masked_tokenized_data += tokenized_str
seq_mask = [False] * len(tokenized_str)
images_seq_mask += seq_mask
else:
assert False, f"Unknown role: {message['role']}"
assert len(tokenized_data) == len(
images_seq_mask), f"format_messages_v2: tokenized_str's length {len(tokenized_str)} is not equal to imags_seq_mask's length {len(images_seq_mask)}"
assert len(images_spatial_crop) == len(num_image_tokens), f"image number should be compatible"
return tokenized_data, masked_tokenized_data, images_list, images_seq_mask, images_spatial_crop, num_image_tokens