static int cs_ioctl_signal_wait()

in habanalabs/common/command_submission.c [1965:2223]


static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
				void __user *chunks, u32 num_chunks,
				u64 *cs_seq, u32 flags, u32 timeout,
				u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
{
	struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
	bool handle_found = false, is_wait_cs = false,
			wait_cs_submitted = false,
			cs_encaps_signals = false;
	struct hl_cs_chunk *cs_chunk_array, *chunk;
	bool staged_cs_with_encaps_signals = false;
	struct hw_queue_properties *hw_queue_prop;
	struct hl_device *hdev = hpriv->hdev;
	struct hl_cs_compl *sig_waitcs_cmpl;
	u32 q_idx, collective_engine_id = 0;
	struct hl_cs_counters_atomic *cntr;
	struct hl_fence *sig_fence = NULL;
	struct hl_ctx *ctx = hpriv->ctx;
	enum hl_queue_type q_type;
	struct hl_cs *cs;
	u64 signal_seq;
	int rc;

	cntr = &hdev->aggregated_cs_counters;
	*cs_seq = ULLONG_MAX;

	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
			ctx);
	if (rc)
		goto out;

	/* currently it is guaranteed to have only one chunk */
	chunk = &cs_chunk_array[0];

	if (chunk->queue_index >= hdev->asic_prop.max_queues) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&cntr->validation_drop_cnt);
		dev_err(hdev->dev, "Queue index %d is invalid\n",
			chunk->queue_index);
		rc = -EINVAL;
		goto free_cs_chunk_array;
	}

	q_idx = chunk->queue_index;
	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
	q_type = hw_queue_prop->type;

	if (!hw_queue_prop->supports_sync_stream) {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&cntr->validation_drop_cnt);
		dev_err(hdev->dev,
			"Queue index %d does not support sync stream operations\n",
			q_idx);
		rc = -EINVAL;
		goto free_cs_chunk_array;
	}

	if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
		if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"Queue index %d is invalid\n", q_idx);
			rc = -EINVAL;
			goto free_cs_chunk_array;
		}

		if (!hdev->nic_ports_mask) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"Collective operations not supported when NIC ports are disabled");
			rc = -EINVAL;
			goto free_cs_chunk_array;
		}

		collective_engine_id = chunk->collective_engine_id;
	}

	is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
			cs_type == CS_TYPE_COLLECTIVE_WAIT);

	cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);

	if (is_wait_cs) {
		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
				ctx, cs_encaps_signals);
		if (rc)
			goto free_cs_chunk_array;

		if (cs_encaps_signals) {
			/* check if cs sequence has encapsulated
			 * signals handle
			 */
			struct idr *idp;
			u32 id;

			spin_lock(&ctx->sig_mgr.lock);
			idp = &ctx->sig_mgr.handles;
			idr_for_each_entry(idp, encaps_sig_hdl, id) {
				if (encaps_sig_hdl->cs_seq == signal_seq) {
					handle_found = true;
					/* get refcount to protect removing
					 * this handle from idr, needed when
					 * multiple wait cs are used with offset
					 * to wait on reserved encaps signals.
					 */
					kref_get(&encaps_sig_hdl->refcount);
					break;
				}
			}
			spin_unlock(&ctx->sig_mgr.lock);

			if (!handle_found) {
				/* treat as signal CS already finished */
				dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
						signal_seq);
				rc = 0;
				goto free_cs_chunk_array;
			}

			/* validate also the signal offset value */
			if (chunk->encaps_signal_offset >
					encaps_sig_hdl->count) {
				dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
						chunk->encaps_signal_offset,
						encaps_sig_hdl->count);
				rc = -EINVAL;
				goto free_cs_chunk_array;
			}
		}

		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
		if (IS_ERR(sig_fence)) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"Failed to get signal CS with seq 0x%llx\n",
				signal_seq);
			rc = PTR_ERR(sig_fence);
			goto free_cs_chunk_array;
		}

		if (!sig_fence) {
			/* signal CS already finished */
			rc = 0;
			goto free_cs_chunk_array;
		}

		sig_waitcs_cmpl =
			container_of(sig_fence, struct hl_cs_compl, base_fence);

		staged_cs_with_encaps_signals = !!
				(sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
				(flags & HL_CS_FLAGS_ENCAP_SIGNALS));

		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
				!staged_cs_with_encaps_signals) {
			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
			atomic64_inc(&cntr->validation_drop_cnt);
			dev_err(hdev->dev,
				"CS seq 0x%llx is not of a signal/encaps-signal CS\n",
				signal_seq);
			hl_fence_put(sig_fence);
			rc = -EINVAL;
			goto free_cs_chunk_array;
		}

		if (completion_done(&sig_fence->completion)) {
			/* signal CS already finished */
			hl_fence_put(sig_fence);
			rc = 0;
			goto free_cs_chunk_array;
		}
	}

	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
	if (rc) {
		if (is_wait_cs)
			hl_fence_put(sig_fence);

		goto free_cs_chunk_array;
	}

	/*
	 * Save the signal CS fence for later initialization right before
	 * hanging the wait CS on the queue.
	 * for encaps signals case, we save the cs sequence and handle pointer
	 * for later initialization.
	 */
	if (is_wait_cs) {
		cs->signal_fence = sig_fence;
		/* store the handle pointer, so we don't have to
		 * look for it again, later on the flow
		 * when we need to set SOB info in hw_queue.
		 */
		if (cs->encaps_signals)
			cs->encaps_sig_hdl = encaps_sig_hdl;
	}

	hl_debugfs_add_cs(cs);

	*cs_seq = cs->sequence;

	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
		rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
				q_idx, chunk->encaps_signal_offset);
	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
				cs, q_idx, collective_engine_id,
				chunk->encaps_signal_offset);
	else {
		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
		atomic64_inc(&cntr->validation_drop_cnt);
		rc = -EINVAL;
	}

	if (rc)
		goto free_cs_object;

	rc = hl_hw_queue_schedule_cs(cs);
	if (rc) {
		/* In case wait cs failed here, it means the signal cs
		 * already completed. we want to free all it's related objects
		 * but we don't want to fail the ioctl.
		 */
		if (is_wait_cs)
			rc = 0;
		else if (rc != -EAGAIN)
			dev_err(hdev->dev,
				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
				ctx->asid, cs->sequence, rc);
		goto free_cs_object;
	}

	*signal_sob_addr_offset = cs->sob_addr_offset;
	*signal_initial_sob_count = cs->initial_sob_count;

	rc = HL_CS_STATUS_SUCCESS;
	if (is_wait_cs)
		wait_cs_submitted = true;
	goto put_cs;

free_cs_object:
	cs_rollback(hdev, cs);
	*cs_seq = ULLONG_MAX;
	/* The path below is both for good and erroneous exits */
put_cs:
	/* We finished with the CS in this function, so put the ref */
	cs_put(cs);
free_cs_chunk_array:
	if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
							is_wait_cs)
		kref_put(&encaps_sig_hdl->refcount,
				hl_encaps_handle_do_release);
	kfree(cs_chunk_array);
out:
	return rc;
}