in habanalabs/common/command_submission.c [1965:2223]
static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
void __user *chunks, u32 num_chunks,
u64 *cs_seq, u32 flags, u32 timeout,
u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
{
struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
bool handle_found = false, is_wait_cs = false,
wait_cs_submitted = false,
cs_encaps_signals = false;
struct hl_cs_chunk *cs_chunk_array, *chunk;
bool staged_cs_with_encaps_signals = false;
struct hw_queue_properties *hw_queue_prop;
struct hl_device *hdev = hpriv->hdev;
struct hl_cs_compl *sig_waitcs_cmpl;
u32 q_idx, collective_engine_id = 0;
struct hl_cs_counters_atomic *cntr;
struct hl_fence *sig_fence = NULL;
struct hl_ctx *ctx = hpriv->ctx;
enum hl_queue_type q_type;
struct hl_cs *cs;
u64 signal_seq;
int rc;
cntr = &hdev->aggregated_cs_counters;
*cs_seq = ULLONG_MAX;
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
ctx);
if (rc)
goto out;
/* currently it is guaranteed to have only one chunk */
chunk = &cs_chunk_array[0];
if (chunk->queue_index >= hdev->asic_prop.max_queues) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index);
rc = -EINVAL;
goto free_cs_chunk_array;
}
q_idx = chunk->queue_index;
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
q_type = hw_queue_prop->type;
if (!hw_queue_prop->supports_sync_stream) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Queue index %d does not support sync stream operations\n",
q_idx);
rc = -EINVAL;
goto free_cs_chunk_array;
}
if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Queue index %d is invalid\n", q_idx);
rc = -EINVAL;
goto free_cs_chunk_array;
}
if (!hdev->nic_ports_mask) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Collective operations not supported when NIC ports are disabled");
rc = -EINVAL;
goto free_cs_chunk_array;
}
collective_engine_id = chunk->collective_engine_id;
}
is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
cs_type == CS_TYPE_COLLECTIVE_WAIT);
cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
if (is_wait_cs) {
rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
ctx, cs_encaps_signals);
if (rc)
goto free_cs_chunk_array;
if (cs_encaps_signals) {
/* check if cs sequence has encapsulated
* signals handle
*/
struct idr *idp;
u32 id;
spin_lock(&ctx->sig_mgr.lock);
idp = &ctx->sig_mgr.handles;
idr_for_each_entry(idp, encaps_sig_hdl, id) {
if (encaps_sig_hdl->cs_seq == signal_seq) {
handle_found = true;
/* get refcount to protect removing
* this handle from idr, needed when
* multiple wait cs are used with offset
* to wait on reserved encaps signals.
*/
kref_get(&encaps_sig_hdl->refcount);
break;
}
}
spin_unlock(&ctx->sig_mgr.lock);
if (!handle_found) {
/* treat as signal CS already finished */
dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
signal_seq);
rc = 0;
goto free_cs_chunk_array;
}
/* validate also the signal offset value */
if (chunk->encaps_signal_offset >
encaps_sig_hdl->count) {
dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
chunk->encaps_signal_offset,
encaps_sig_hdl->count);
rc = -EINVAL;
goto free_cs_chunk_array;
}
}
sig_fence = hl_ctx_get_fence(ctx, signal_seq);
if (IS_ERR(sig_fence)) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Failed to get signal CS with seq 0x%llx\n",
signal_seq);
rc = PTR_ERR(sig_fence);
goto free_cs_chunk_array;
}
if (!sig_fence) {
/* signal CS already finished */
rc = 0;
goto free_cs_chunk_array;
}
sig_waitcs_cmpl =
container_of(sig_fence, struct hl_cs_compl, base_fence);
staged_cs_with_encaps_signals = !!
(sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
(flags & HL_CS_FLAGS_ENCAP_SIGNALS));
if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
!staged_cs_with_encaps_signals) {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"CS seq 0x%llx is not of a signal/encaps-signal CS\n",
signal_seq);
hl_fence_put(sig_fence);
rc = -EINVAL;
goto free_cs_chunk_array;
}
if (completion_done(&sig_fence->completion)) {
/* signal CS already finished */
hl_fence_put(sig_fence);
rc = 0;
goto free_cs_chunk_array;
}
}
rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
if (rc) {
if (is_wait_cs)
hl_fence_put(sig_fence);
goto free_cs_chunk_array;
}
/*
* Save the signal CS fence for later initialization right before
* hanging the wait CS on the queue.
* for encaps signals case, we save the cs sequence and handle pointer
* for later initialization.
*/
if (is_wait_cs) {
cs->signal_fence = sig_fence;
/* store the handle pointer, so we don't have to
* look for it again, later on the flow
* when we need to set SOB info in hw_queue.
*/
if (cs->encaps_signals)
cs->encaps_sig_hdl = encaps_sig_hdl;
}
hl_debugfs_add_cs(cs);
*cs_seq = cs->sequence;
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
q_idx, chunk->encaps_signal_offset);
else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
cs, q_idx, collective_engine_id,
chunk->encaps_signal_offset);
else {
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
atomic64_inc(&cntr->validation_drop_cnt);
rc = -EINVAL;
}
if (rc)
goto free_cs_object;
rc = hl_hw_queue_schedule_cs(cs);
if (rc) {
/* In case wait cs failed here, it means the signal cs
* already completed. we want to free all it's related objects
* but we don't want to fail the ioctl.
*/
if (is_wait_cs)
rc = 0;
else if (rc != -EAGAIN)
dev_err(hdev->dev,
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
ctx->asid, cs->sequence, rc);
goto free_cs_object;
}
*signal_sob_addr_offset = cs->sob_addr_offset;
*signal_initial_sob_count = cs->initial_sob_count;
rc = HL_CS_STATUS_SUCCESS;
if (is_wait_cs)
wait_cs_submitted = true;
goto put_cs;
free_cs_object:
cs_rollback(hdev, cs);
*cs_seq = ULLONG_MAX;
/* The path below is both for good and erroneous exits */
put_cs:
/* We finished with the CS in this function, so put the ref */
cs_put(cs);
free_cs_chunk_array:
if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
is_wait_cs)
kref_put(&encaps_sig_hdl->refcount,
hl_encaps_handle_do_release);
kfree(cs_chunk_array);
out:
return rc;
}