in src/optimum/nvidia/export/config.py [0:0]
    def validate(self) -> "ExportConfig":
        if self.optimization_level < 0:
            raise ValueError(
                f"optimization_level should be >= 0, got {self.optimization_level}"
            )
        if self.max_num_tokens == -1:
            if self.enabled_chunked_context:
                # Should be N * tokens_per_block (8192 is the default)
                self.max_num_tokens = 8192  # hardcode for now
                warn(
                    f"max_num_tokens set to {self.max_num_tokens} with chunked context enabled might not be optimal."
                )
            else:
                self.max_num_tokens = self.max_batch_size * self.max_input_len // 2
            LOGGER.debug(f"Inferred max_num_tokens={self.max_num_tokens}")
        return self