in optimum/intel/openvino/modeling_visual_language.py [0:0]
def forward(
self,
input_ids,
pixel_values=None,
past_key_values=None,
inputs_embeds=None,
image_sizes=None,
attention_mask=None,
position_ids=None,
image_bound=None,
tgt_sizes=None,
pixel_values_videos=None,
image_grid_thw=None,
video_grid_thw=None,
rope_deltas=None,
images=None,
second_per_grid_ts=None,
token_type_ids=None,
pixel_attention_mask=None,
input_image_embeds: Optional[torch.FloatTensor] = None,
image_pixel_values: Optional[torch.FloatTensor] = None,
image_attention_mask=None,
audio_input_features: Optional[torch.FloatTensor] = None,
input_audio_embeds: Optional[torch.FloatTensor] = None,
audio_embed_sizes=None,
audio_attention_mask=None,
input_mode=None,
**kwargs,