in habanalabs/common/command_submission.c [1268:1457]
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
u32 num_chunks, u64 *cs_seq, u32 flags,
u32 encaps_signals_handle, u32 timeout,
u16 *signal_initial_sob_count)
{
bool staged_mid, int_queues_only = true;
struct hl_device *hdev = hpriv->hdev;
struct hl_cs_chunk *cs_chunk_array;
struct hl_cs_counters_atomic *cntr;
struct hl_ctx *ctx = hpriv->ctx;
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
u64 user_sequence;
u8 stream_master_qid_map = 0;
int rc, i;
cntr = &hdev->aggregated_cs_counters;
user_sequence = *cs_seq;
*cs_seq = ULLONG_MAX;
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
hpriv->ctx);
if (rc)
goto out;
if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
staged_mid = true;
else
staged_mid = false;
rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
timeout);
if (rc)
goto free_cs_chunk_array;
*cs_seq = cs->sequence;
hl_debugfs_add_cs(cs);
rc = cs_staged_submission(hdev, cs, user_sequence, flags,
encaps_signals_handle);
if (rc)
goto free_cs_object;
/* If this is a staged submission we must return the staged sequence
* rather than the internal CS sequence
*/
if (cs->staged_cs)
*cs_seq = cs->staged_sequence;
/* Validate ALL the CS chunks before submitting the CS */
for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
enum hl_queue_type queue_type;
bool is_kernel_allocated_cb;
rc = validate_queue_index(hdev, chunk, &queue_type,
&is_kernel_allocated_cb);
if (rc) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
goto free_cs_object;
}
if (is_kernel_allocated_cb) {
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) {
atomic64_inc(
&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
rc = -EINVAL;
goto free_cs_object;
}
} else {
cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
}
if (queue_type == QUEUE_TYPE_EXT ||
queue_type == QUEUE_TYPE_HW) {
int_queues_only = false;
/*
* store which stream are being used for external/HW
* queues of this CS
*/
if (hdev->supports_wait_for_multi_cs)
stream_master_qid_map |=
get_stream_master_qid_mask(hdev,
chunk->queue_index);
}
job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
if (is_kernel_allocated_cb)
goto release_cb;
goto free_cs_object;
}
job->id = i + 1;
job->cs = cs;
job->user_cb = cb;
job->user_cb_size = chunk->cb_size;
job->hw_queue_id = chunk->queue_index;
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
list_add_tail(&job->cs_node, &cs->job_list);
/*
* Increment CS reference. When CS reference is 0, CS is
* done and can be signaled to user and free all its resources
* Only increment for JOB on external or H/W queues, because
* only for those JOBs we get completion
*/
if (cs_needs_completion(cs) &&
(job->queue_type == QUEUE_TYPE_EXT ||
job->queue_type == QUEUE_TYPE_HW))
cs_get(cs);
hl_debugfs_add_job(hdev, job);
rc = cs_parser(hpriv, job);
if (rc) {
atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
atomic64_inc(&cntr->parsing_drop_cnt);
dev_err(hdev->dev,
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
cs->ctx->asid, cs->sequence, job->id, rc);
goto free_cs_object;
}
}
/* We allow a CS with any queue type combination as long as it does
* not get a completion
*/
if (int_queues_only && cs_needs_completion(cs)) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
cs->ctx->asid, cs->sequence);
rc = -EINVAL;
goto free_cs_object;
}
/*
* store the (external/HW queues) streams used by the CS in the
* fence object for multi-CS completion
*/
if (hdev->supports_wait_for_multi_cs)
cs->fence->stream_master_qid_map = stream_master_qid_map;
rc = hl_hw_queue_schedule_cs(cs);
if (rc) {
if (rc != -EAGAIN)
dev_err(hdev->dev,
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
cs->ctx->asid, cs->sequence, rc);
goto free_cs_object;
}
*signal_initial_sob_count = cs->initial_sob_count;
rc = HL_CS_STATUS_SUCCESS;
goto put_cs;
release_cb:
atomic_dec(&cb->cs_cnt);
hl_cb_put(cb);
free_cs_object:
cs_rollback(hdev, cs);
*cs_seq = ULLONG_MAX;
/* The path below is both for good and erroneous exits */
put_cs:
/* We finished with the CS in this function, so put the ref */
cs_put(cs);
free_cs_chunk_array:
kfree(cs_chunk_array);
out:
return rc;
}