1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
14 #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15 HL_CS_FLAGS_COLLECTIVE_WAIT)
17 #define MAX_TS_ITER_NUM 10
20 * enum hl_cs_wait_status - cs wait status
21 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
22 * @CS_WAIT_STATUS_COMPLETED: cs completed
23 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
25 enum hl_cs_wait_status {
27 CS_WAIT_STATUS_COMPLETED,
31 static void job_wq_completion(struct work_struct *work);
32 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
33 u64 timeout_us, u64 seq,
34 enum hl_cs_wait_status *status, s64 *timestamp);
35 static void cs_do_release(struct kref *ref);
37 static void hl_sob_reset(struct kref *ref)
39 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
41 struct hl_device *hdev = hw_sob->hdev;
43 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
45 hdev->asic_funcs->reset_sob(hdev, hw_sob);
47 hw_sob->need_reset = false;
50 void hl_sob_reset_error(struct kref *ref)
52 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
54 struct hl_device *hdev = hw_sob->hdev;
57 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
58 hw_sob->q_idx, hw_sob->sob_id);
61 void hw_sob_put(struct hl_hw_sob *hw_sob)
64 kref_put(&hw_sob->kref, hl_sob_reset);
67 static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
70 kref_put(&hw_sob->kref, hl_sob_reset_error);
73 void hw_sob_get(struct hl_hw_sob *hw_sob)
76 kref_get(&hw_sob->kref);
80 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
81 * @sob_base: sob base id
82 * @sob_mask: sob user mask, each bit represents a sob offset from sob base
83 * @mask: generated mask
85 * Return: 0 if given parameters are valid
87 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
94 if (sob_mask == 0x1) {
95 *mask = ~(1 << (sob_base & 0x7));
97 /* find msb in order to verify sob range is valid */
98 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
99 if (BIT(i) & sob_mask)
102 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
111 static void hl_fence_release(struct kref *kref)
113 struct hl_fence *fence =
114 container_of(kref, struct hl_fence, refcount);
115 struct hl_cs_compl *hl_cs_cmpl =
116 container_of(fence, struct hl_cs_compl, base_fence);
121 void hl_fence_put(struct hl_fence *fence)
123 if (IS_ERR_OR_NULL(fence))
125 kref_put(&fence->refcount, hl_fence_release);
128 void hl_fences_put(struct hl_fence **fence, int len)
132 for (i = 0; i < len; i++, fence++)
133 hl_fence_put(*fence);
136 void hl_fence_get(struct hl_fence *fence)
139 kref_get(&fence->refcount);
142 static void hl_fence_init(struct hl_fence *fence, u64 sequence)
144 kref_init(&fence->refcount);
145 fence->cs_sequence = sequence;
147 fence->timestamp = ktime_set(0, 0);
148 fence->mcs_handling_done = false;
149 init_completion(&fence->completion);
152 void cs_get(struct hl_cs *cs)
154 kref_get(&cs->refcount);
157 static int cs_get_unless_zero(struct hl_cs *cs)
159 return kref_get_unless_zero(&cs->refcount);
162 static void cs_put(struct hl_cs *cs)
164 kref_put(&cs->refcount, cs_do_release);
167 static void cs_job_do_release(struct kref *ref)
169 struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
174 static void cs_job_put(struct hl_cs_job *job)
176 kref_put(&job->refcount, cs_job_do_release);
179 bool cs_needs_completion(struct hl_cs *cs)
181 /* In case this is a staged CS, only the last CS in sequence should
182 * get a completion, any non staged CS will always get a completion
184 if (cs->staged_cs && !cs->staged_last)
190 bool cs_needs_timeout(struct hl_cs *cs)
192 /* In case this is a staged CS, only the first CS in sequence should
193 * get a timeout, any non staged CS will always get a timeout
195 if (cs->staged_cs && !cs->staged_first)
201 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
204 * Patched CB is created for external queues jobs, and for H/W queues
205 * jobs if the user CB was allocated by driver and MMU is disabled.
207 return (job->queue_type == QUEUE_TYPE_EXT ||
208 (job->queue_type == QUEUE_TYPE_HW &&
209 job->is_kernel_allocated_cb &&
214 * cs_parser - parse the user command submission
216 * @hpriv : pointer to the private data of the fd
217 * @job : pointer to the job that holds the command submission info
219 * The function parses the command submission of the user. It calls the
220 * ASIC specific parser, which returns a list of memory blocks to send
221 * to the device as different command buffers
224 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
226 struct hl_device *hdev = hpriv->hdev;
227 struct hl_cs_parser parser;
230 parser.ctx_id = job->cs->ctx->asid;
231 parser.cs_sequence = job->cs->sequence;
232 parser.job_id = job->id;
234 parser.hw_queue_id = job->hw_queue_id;
235 parser.job_userptr_list = &job->userptr_list;
236 parser.patched_cb = NULL;
237 parser.user_cb = job->user_cb;
238 parser.user_cb_size = job->user_cb_size;
239 parser.queue_type = job->queue_type;
240 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
241 job->patched_cb = NULL;
242 parser.completion = cs_needs_completion(job->cs);
244 rc = hdev->asic_funcs->cs_parser(hdev, &parser);
246 if (is_cb_patched(hdev, job)) {
248 job->patched_cb = parser.patched_cb;
249 job->job_cb_size = parser.patched_cb_size;
250 job->contains_dma_pkt = parser.contains_dma_pkt;
251 atomic_inc(&job->patched_cb->cs_cnt);
255 * Whether the parsing worked or not, we don't need the
256 * original CB anymore because it was already parsed and
257 * won't be accessed again for this CS
259 atomic_dec(&job->user_cb->cs_cnt);
260 hl_cb_put(job->user_cb);
263 job->job_cb_size = job->user_cb_size;
269 static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
271 struct hl_cs *cs = job->cs;
273 if (is_cb_patched(hdev, job)) {
274 hl_userptr_delete_list(hdev, &job->userptr_list);
277 * We might arrive here from rollback and patched CB wasn't
278 * created, so we need to check it's not NULL
280 if (job->patched_cb) {
281 atomic_dec(&job->patched_cb->cs_cnt);
282 hl_cb_put(job->patched_cb);
286 /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
287 * enabled, the user CB isn't released in cs_parser() and thus should be
289 * This is also true for INT queues jobs which were allocated by driver
291 if (job->is_kernel_allocated_cb &&
292 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
293 job->queue_type == QUEUE_TYPE_INT)) {
294 atomic_dec(&job->user_cb->cs_cnt);
295 hl_cb_put(job->user_cb);
299 * This is the only place where there can be multiple threads
300 * modifying the list at the same time
302 spin_lock(&cs->job_lock);
303 list_del(&job->cs_node);
304 spin_unlock(&cs->job_lock);
306 hl_debugfs_remove_job(hdev, job);
308 /* We decrement reference only for a CS that gets completion
309 * because the reference was incremented only for this kind of CS
310 * right before it was scheduled.
312 * In staged submission, only the last CS marked as 'staged_last'
313 * gets completion, hence its release function will be called from here.
314 * As for all the rest CS's in the staged submission which do not get
315 * completion, their CS reference will be decremented by the
316 * 'staged_last' CS during the CS release flow.
317 * All relevant PQ CI counters will be incremented during the CS release
318 * flow by calling 'hl_hw_queue_update_ci'.
320 if (cs_needs_completion(cs) &&
321 (job->queue_type == QUEUE_TYPE_EXT ||
322 job->queue_type == QUEUE_TYPE_HW))
329 * hl_staged_cs_find_first - locate the first CS in this staged submission
331 * @hdev: pointer to device structure
332 * @cs_seq: staged submission sequence number
334 * @note: This function must be called under 'hdev->cs_mirror_lock'
336 * Find and return a CS pointer with the given sequence
338 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
342 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
343 if (cs->staged_cs && cs->staged_first &&
344 cs->sequence == cs_seq)
351 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
353 * @hdev: pointer to device structure
354 * @cs: staged submission member
357 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
359 struct hl_cs *last_entry;
361 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
364 if (last_entry->staged_last)
371 * staged_cs_get - get CS reference if this CS is a part of a staged CS
373 * @hdev: pointer to device structure
375 * @cs_seq: staged submission sequence number
377 * Increment CS reference for every CS in this staged submission except for
378 * the CS which get completion.
380 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
382 /* Only the last CS in this staged submission will get a completion.
383 * We must increment the reference for all other CS's in this
385 * Once we get a completion we will release the whole staged submission.
387 if (!cs->staged_last)
392 * staged_cs_put - put a CS in case it is part of staged submission
394 * @hdev: pointer to device structure
397 * This function decrements a CS reference (for a non completion CS)
399 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
401 /* We release all CS's in a staged submission except the last
402 * CS which we have never incremented its reference.
404 if (!cs_needs_completion(cs))
408 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
410 bool next_entry_found = false;
411 struct hl_cs *next, *first_cs;
413 if (!cs_needs_timeout(cs))
416 spin_lock(&hdev->cs_mirror_lock);
418 /* We need to handle tdr only once for the complete staged submission.
419 * Hence, we choose the CS that reaches this function first which is
420 * the CS marked as 'staged_last'.
421 * In case single staged cs was submitted which has both first and last
422 * indications, then "cs_find_first" below will return NULL, since we
423 * removed the cs node from the list before getting here,
424 * in such cases just continue with the cs to cancel it's TDR work.
426 if (cs->staged_cs && cs->staged_last) {
427 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
432 spin_unlock(&hdev->cs_mirror_lock);
434 /* Don't cancel TDR in case this CS was timedout because we might be
435 * running from the TDR context
437 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
441 cancel_delayed_work_sync(&cs->work_tdr);
443 spin_lock(&hdev->cs_mirror_lock);
445 /* queue TDR for next CS */
446 list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
447 if (cs_needs_timeout(next)) {
448 next_entry_found = true;
452 if (next_entry_found && !next->tdr_active) {
453 next->tdr_active = true;
454 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
457 spin_unlock(&hdev->cs_mirror_lock);
461 * force_complete_multi_cs - complete all contexts that wait on multi-CS
463 * @hdev: pointer to habanalabs device structure
465 static void force_complete_multi_cs(struct hl_device *hdev)
469 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
470 struct multi_cs_completion *mcs_compl;
472 mcs_compl = &hdev->multi_cs_completion[i];
474 spin_lock(&mcs_compl->lock);
476 if (!mcs_compl->used) {
477 spin_unlock(&mcs_compl->lock);
481 /* when calling force complete no context should be waiting on
483 * We are calling the function as a protection for such case
484 * to free any pending context and print error message
487 "multi-CS completion context %d still waiting when calling force completion\n",
489 complete_all(&mcs_compl->completion);
490 spin_unlock(&mcs_compl->lock);
495 * complete_multi_cs - complete all waiting entities on multi-CS
497 * @hdev: pointer to habanalabs device structure
499 * The function signals a waiting entity that has an overlapping stream masters
500 * with the completed CS.
502 * - a completed CS worked on stream master QID 4, multi CS completion
503 * is actively waiting on stream master QIDs 3, 5. don't send signal as no
504 * common stream master QID
505 * - a completed CS worked on stream master QID 4, multi CS completion
506 * is actively waiting on stream master QIDs 3, 4. send signal as stream
507 * master QID 4 is common
509 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
511 struct hl_fence *fence = cs->fence;
514 /* in case of multi CS check for completion only for the first CS */
515 if (cs->staged_cs && !cs->staged_first)
518 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
519 struct multi_cs_completion *mcs_compl;
521 mcs_compl = &hdev->multi_cs_completion[i];
522 if (!mcs_compl->used)
525 spin_lock(&mcs_compl->lock);
529 * 1. still waiting for completion
530 * 2. the completed CS has at least one overlapping stream
531 * master with the stream masters in the completion
533 if (mcs_compl->used &&
534 (fence->stream_master_qid_map &
535 mcs_compl->stream_master_qid_map)) {
536 /* extract the timestamp only of first completed CS */
537 if (!mcs_compl->timestamp)
538 mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
540 complete_all(&mcs_compl->completion);
543 * Setting mcs_handling_done inside the lock ensures
544 * at least one fence have mcs_handling_done set to
545 * true before wait for mcs finish. This ensures at
546 * least one CS will be set as completed when polling
549 fence->mcs_handling_done = true;
552 spin_unlock(&mcs_compl->lock);
554 /* In case CS completed without mcs completion initialized */
555 fence->mcs_handling_done = true;
558 static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
560 struct hl_cs_compl *hl_cs_cmpl)
562 /* Skip this handler if the cs wasn't submitted, to avoid putting
563 * the hw_sob twice, since this case already handled at this point,
564 * also skip if the hw_sob pointer wasn't set.
566 if (!hl_cs_cmpl->hw_sob || !cs->submitted)
569 spin_lock(&hl_cs_cmpl->lock);
572 * we get refcount upon reservation of signals or signal/wait cs for the
573 * hw_sob object, and need to put it when the first staged cs
574 * (which cotains the encaps signals) or cs signal/wait is completed.
576 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
577 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
578 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
579 (!!hl_cs_cmpl->encaps_signals)) {
581 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
584 hl_cs_cmpl->hw_sob->sob_id,
585 hl_cs_cmpl->sob_val);
587 hw_sob_put(hl_cs_cmpl->hw_sob);
589 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
590 hdev->asic_funcs->reset_sob_group(hdev,
591 hl_cs_cmpl->sob_group);
594 spin_unlock(&hl_cs_cmpl->lock);
597 static void cs_do_release(struct kref *ref)
599 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
600 struct hl_device *hdev = cs->ctx->hdev;
601 struct hl_cs_job *job, *tmp;
602 struct hl_cs_compl *hl_cs_cmpl =
603 container_of(cs->fence, struct hl_cs_compl, base_fence);
605 cs->completed = true;
608 * Although if we reached here it means that all external jobs have
609 * finished, because each one of them took refcnt to CS, we still
610 * need to go over the internal jobs and complete them. Otherwise, we
611 * will have leaked memory and what's worse, the CS object (and
612 * potentially the CTX object) could be released, while the JOB
613 * still holds a pointer to them (but no reference).
615 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
616 complete_job(hdev, job);
618 if (!cs->submitted) {
620 * In case the wait for signal CS was submitted, the fence put
621 * occurs in init_signal_wait_cs() or collective_wait_init_cs()
622 * right before hanging on the PQ.
624 if (cs->type == CS_TYPE_WAIT ||
625 cs->type == CS_TYPE_COLLECTIVE_WAIT)
626 hl_fence_put(cs->signal_fence);
631 /* Need to update CI for all queue jobs that does not get completion */
632 hl_hw_queue_update_ci(cs);
634 /* remove CS from CS mirror list */
635 spin_lock(&hdev->cs_mirror_lock);
636 list_del_init(&cs->mirror_node);
637 spin_unlock(&hdev->cs_mirror_lock);
639 cs_handle_tdr(hdev, cs);
642 /* the completion CS decrements reference for the entire
645 if (cs->staged_last) {
646 struct hl_cs *staged_cs, *tmp;
648 list_for_each_entry_safe(staged_cs, tmp,
649 &cs->staged_cs_node, staged_cs_node)
650 staged_cs_put(hdev, staged_cs);
653 /* A staged CS will be a member in the list only after it
654 * was submitted. We used 'cs_mirror_lock' when inserting
655 * it to list so we will use it again when removing it
658 spin_lock(&hdev->cs_mirror_lock);
659 list_del(&cs->staged_cs_node);
660 spin_unlock(&hdev->cs_mirror_lock);
663 /* decrement refcount to handle when first staged cs
664 * with encaps signals is completed.
666 if (hl_cs_cmpl->encaps_signals)
667 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
668 hl_encaps_handle_do_release);
671 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
672 && cs->encaps_signals)
673 kref_put(&cs->encaps_sig_hdl->refcount,
674 hl_encaps_handle_do_release);
677 /* Must be called before hl_ctx_put because inside we use ctx to get
680 hl_debugfs_remove_cs(cs);
684 /* We need to mark an error for not submitted because in that case
685 * the hl fence release flow is different. Mainly, we don't need
686 * to handle hw_sob for signal/wait
689 cs->fence->error = -ETIMEDOUT;
690 else if (cs->aborted)
691 cs->fence->error = -EIO;
692 else if (!cs->submitted)
693 cs->fence->error = -EBUSY;
695 if (unlikely(cs->skip_reset_on_timeout)) {
697 "Command submission %llu completed after %llu (s)\n",
699 div_u64(jiffies - cs->submission_time_jiffies, HZ));
703 cs->fence->timestamp = ktime_get();
704 complete_all(&cs->fence->completion);
705 complete_multi_cs(hdev, cs);
707 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
709 hl_fence_put(cs->fence);
711 kfree(cs->jobs_in_queue_cnt);
715 static void cs_timedout(struct work_struct *work)
717 struct hl_device *hdev;
719 struct hl_cs *cs = container_of(work, struct hl_cs,
721 bool skip_reset_on_timeout = cs->skip_reset_on_timeout;
723 rc = cs_get_unless_zero(cs);
727 if ((!cs->submitted) || (cs->completed)) {
732 /* Mark the CS is timed out so we won't try to cancel its TDR */
733 if (likely(!skip_reset_on_timeout))
736 hdev = cs->ctx->hdev;
738 /* Save only the first CS timeout parameters */
739 rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1);
741 hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
742 hdev->last_error.cs_timeout_timestamp = ktime_get();
743 hdev->last_error.cs_timeout_seq = cs->sequence;
749 "Signal command submission %llu has not finished in time!\n",
755 "Wait command submission %llu has not finished in time!\n",
759 case CS_TYPE_COLLECTIVE_WAIT:
761 "Collective Wait command submission %llu has not finished in time!\n",
767 "Command submission %llu has not finished in time!\n",
772 rc = hl_state_dump(hdev);
774 dev_err(hdev->dev, "Error during system state dump %d\n", rc);
778 if (likely(!skip_reset_on_timeout)) {
779 if (hdev->reset_on_lockup)
780 hl_device_reset(hdev, HL_DRV_RESET_TDR);
782 hdev->reset_info.needs_reset = true;
786 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
787 enum hl_cs_type cs_type, u64 user_sequence,
788 struct hl_cs **cs_new, u32 flags, u32 timeout)
790 struct hl_cs_counters_atomic *cntr;
791 struct hl_fence *other = NULL;
792 struct hl_cs_compl *cs_cmpl;
796 cntr = &hdev->aggregated_cs_counters;
798 cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
800 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
803 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
804 atomic64_inc(&cntr->out_of_mem_drop_cnt);
808 /* increment refcnt for context */
809 hl_ctx_get(hdev, ctx);
812 cs->submitted = false;
813 cs->completed = false;
815 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
816 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
817 cs->timeout_jiffies = timeout;
818 cs->skip_reset_on_timeout =
819 hdev->reset_info.skip_reset_on_timeout ||
820 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
821 cs->submission_time_jiffies = jiffies;
822 INIT_LIST_HEAD(&cs->job_list);
823 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
824 kref_init(&cs->refcount);
825 spin_lock_init(&cs->job_lock);
827 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
829 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
832 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
833 atomic64_inc(&cntr->out_of_mem_drop_cnt);
838 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
839 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
840 if (!cs->jobs_in_queue_cnt)
841 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
842 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
844 if (!cs->jobs_in_queue_cnt) {
845 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
846 atomic64_inc(&cntr->out_of_mem_drop_cnt);
851 cs_cmpl->hdev = hdev;
852 cs_cmpl->type = cs->type;
853 spin_lock_init(&cs_cmpl->lock);
854 cs->fence = &cs_cmpl->base_fence;
856 spin_lock(&ctx->cs_lock);
858 cs_cmpl->cs_seq = ctx->cs_sequence;
859 other = ctx->cs_pending[cs_cmpl->cs_seq &
860 (hdev->asic_prop.max_pending_cs - 1)];
862 if (other && !completion_done(&other->completion)) {
863 /* If the following statement is true, it means we have reached
864 * a point in which only part of the staged submission was
865 * submitted and we don't have enough room in the 'cs_pending'
866 * array for the rest of the submission.
867 * This causes a deadlock because this CS will never be
868 * completed as it depends on future CS's for completion.
870 if (other->cs_sequence == user_sequence)
871 dev_crit_ratelimited(hdev->dev,
872 "Staged CS %llu deadlock due to lack of resources",
875 dev_dbg_ratelimited(hdev->dev,
876 "Rejecting CS because of too many in-flights CS\n");
877 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
878 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
884 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
886 cs->sequence = cs_cmpl->cs_seq;
888 ctx->cs_pending[cs_cmpl->cs_seq &
889 (hdev->asic_prop.max_pending_cs - 1)] =
890 &cs_cmpl->base_fence;
893 hl_fence_get(&cs_cmpl->base_fence);
897 spin_unlock(&ctx->cs_lock);
904 spin_unlock(&ctx->cs_lock);
905 kfree(cs->jobs_in_queue_cnt);
914 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
916 struct hl_cs_job *job, *tmp;
918 staged_cs_put(hdev, cs);
920 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
921 complete_job(hdev, job);
924 void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
927 struct hl_cs *cs, *tmp;
929 if (!skip_wq_flush) {
930 flush_workqueue(hdev->ts_free_obj_wq);
932 /* flush all completions before iterating over the CS mirror list in
933 * order to avoid a race with the release functions
935 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
936 flush_workqueue(hdev->cq_wq[i]);
940 /* Make sure we don't have leftovers in the CS mirror list */
941 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
944 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
945 cs->ctx->asid, cs->sequence);
946 cs_rollback(hdev, cs);
950 force_complete_multi_cs(hdev);
954 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
956 struct hl_user_pending_interrupt *pend, *temp;
959 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
960 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
961 if (pend->ts_reg_info.ts_buff) {
962 list_del(&pend->wait_list_node);
963 hl_ts_put(pend->ts_reg_info.ts_buff);
964 hl_cb_put(pend->ts_reg_info.cq_cb);
966 pend->fence.error = -EIO;
967 complete_all(&pend->fence.completion);
970 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
973 void hl_release_pending_user_interrupts(struct hl_device *hdev)
975 struct asic_fixed_properties *prop = &hdev->asic_prop;
976 struct hl_user_interrupt *interrupt;
979 if (!prop->user_interrupt_count)
982 /* We iterate through the user interrupt requests and waking up all
983 * user threads waiting for interrupt completion. We iterate the
984 * list under a lock, this is why all user threads, once awake,
985 * will wait on the same lock and will release the waiting object upon
989 for (i = 0 ; i < prop->user_interrupt_count ; i++) {
990 interrupt = &hdev->user_interrupt[i];
991 wake_pending_user_interrupt_threads(interrupt);
994 interrupt = &hdev->common_user_interrupt;
995 wake_pending_user_interrupt_threads(interrupt);
998 static void job_wq_completion(struct work_struct *work)
1000 struct hl_cs_job *job = container_of(work, struct hl_cs_job,
1002 struct hl_cs *cs = job->cs;
1003 struct hl_device *hdev = cs->ctx->hdev;
1005 /* job is no longer needed */
1006 complete_job(hdev, job);
1009 static int validate_queue_index(struct hl_device *hdev,
1010 struct hl_cs_chunk *chunk,
1011 enum hl_queue_type *queue_type,
1012 bool *is_kernel_allocated_cb)
1014 struct asic_fixed_properties *asic = &hdev->asic_prop;
1015 struct hw_queue_properties *hw_queue_prop;
1017 /* This must be checked here to prevent out-of-bounds access to
1018 * hw_queues_props array
1020 if (chunk->queue_index >= asic->max_queues) {
1021 dev_err(hdev->dev, "Queue index %d is invalid\n",
1022 chunk->queue_index);
1026 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1028 if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1029 dev_err(hdev->dev, "Queue index %d is invalid\n",
1030 chunk->queue_index);
1034 if (hw_queue_prop->driver_only) {
1036 "Queue index %d is restricted for the kernel driver\n",
1037 chunk->queue_index);
1041 /* When hw queue type isn't QUEUE_TYPE_HW,
1042 * USER_ALLOC_CB flag shall be referred as "don't care".
1044 if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1045 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1046 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1048 "Queue index %d doesn't support user CB\n",
1049 chunk->queue_index);
1053 *is_kernel_allocated_cb = false;
1055 if (!(hw_queue_prop->cb_alloc_flags &
1058 "Queue index %d doesn't support kernel CB\n",
1059 chunk->queue_index);
1063 *is_kernel_allocated_cb = true;
1066 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1070 *queue_type = hw_queue_prop->type;
1074 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1075 struct hl_cb_mgr *cb_mgr,
1076 struct hl_cs_chunk *chunk)
1081 cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
1083 cb = hl_cb_get(hdev, cb_mgr, cb_handle);
1085 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
1089 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1090 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1094 atomic_inc(&cb->cs_cnt);
1103 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1104 enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1106 struct hl_cs_job *job;
1108 job = kzalloc(sizeof(*job), GFP_ATOMIC);
1110 job = kzalloc(sizeof(*job), GFP_KERNEL);
1115 kref_init(&job->refcount);
1116 job->queue_type = queue_type;
1117 job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1119 if (is_cb_patched(hdev, job))
1120 INIT_LIST_HEAD(&job->userptr_list);
1122 if (job->queue_type == QUEUE_TYPE_EXT)
1123 INIT_WORK(&job->finish_work, job_wq_completion);
1128 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1130 if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1131 return CS_TYPE_SIGNAL;
1132 else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1133 return CS_TYPE_WAIT;
1134 else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1135 return CS_TYPE_COLLECTIVE_WAIT;
1136 else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1137 return CS_RESERVE_SIGNALS;
1138 else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1139 return CS_UNRESERVE_SIGNALS;
1141 return CS_TYPE_DEFAULT;
1144 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1146 struct hl_device *hdev = hpriv->hdev;
1147 struct hl_ctx *ctx = hpriv->ctx;
1148 u32 cs_type_flags, num_chunks;
1149 enum hl_device_status status;
1150 enum hl_cs_type cs_type;
1152 if (!hl_device_operational(hdev, &status)) {
1156 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1157 !hdev->supports_staged_submission) {
1158 dev_err(hdev->dev, "staged submission not supported");
1162 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1164 if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1166 "CS type flags are mutually exclusive, context %d\n",
1171 cs_type = hl_cs_get_cs_type(cs_type_flags);
1172 num_chunks = args->in.num_chunks_execute;
1174 if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
1175 !hdev->supports_sync_stream)) {
1176 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1180 if (cs_type == CS_TYPE_DEFAULT) {
1183 "Got execute CS with 0 chunks, context %d\n",
1187 } else if (num_chunks != 1) {
1189 "Sync stream CS mandates one chunk only, context %d\n",
1197 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1198 struct hl_cs_chunk **cs_chunk_array,
1199 void __user *chunks, u32 num_chunks,
1204 if (num_chunks > HL_MAX_JOBS_PER_CS) {
1205 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1206 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1208 "Number of chunks can NOT be larger than %d\n",
1209 HL_MAX_JOBS_PER_CS);
1213 *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1215 if (!*cs_chunk_array)
1216 *cs_chunk_array = kmalloc_array(num_chunks,
1217 sizeof(**cs_chunk_array), GFP_KERNEL);
1218 if (!*cs_chunk_array) {
1219 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1220 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1224 size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1225 if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1226 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1227 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1228 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1229 kfree(*cs_chunk_array);
1236 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1237 u64 sequence, u32 flags,
1238 u32 encaps_signal_handle)
1240 if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1243 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1244 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1246 if (cs->staged_first) {
1247 /* Staged CS sequence is the first CS sequence */
1248 INIT_LIST_HEAD(&cs->staged_cs_node);
1249 cs->staged_sequence = cs->sequence;
1251 if (cs->encaps_signals)
1252 cs->encaps_sig_hdl_id = encaps_signal_handle;
1254 /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1255 * under the cs_mirror_lock
1257 cs->staged_sequence = sequence;
1260 /* Increment CS reference if needed */
1261 staged_cs_get(hdev, cs);
1263 cs->staged_cs = true;
1268 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1272 for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1273 if (qid == hdev->stream_master_qid_arr[i])
1279 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1280 u32 num_chunks, u64 *cs_seq, u32 flags,
1281 u32 encaps_signals_handle, u32 timeout,
1282 u16 *signal_initial_sob_count)
1284 bool staged_mid, int_queues_only = true;
1285 struct hl_device *hdev = hpriv->hdev;
1286 struct hl_cs_chunk *cs_chunk_array;
1287 struct hl_cs_counters_atomic *cntr;
1288 struct hl_ctx *ctx = hpriv->ctx;
1289 struct hl_cs_job *job;
1293 u8 stream_master_qid_map = 0;
1296 cntr = &hdev->aggregated_cs_counters;
1297 user_sequence = *cs_seq;
1298 *cs_seq = ULLONG_MAX;
1300 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1305 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1306 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1311 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1312 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1315 goto free_cs_chunk_array;
1317 *cs_seq = cs->sequence;
1319 hl_debugfs_add_cs(cs);
1321 rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1322 encaps_signals_handle);
1324 goto free_cs_object;
1326 /* If this is a staged submission we must return the staged sequence
1327 * rather than the internal CS sequence
1330 *cs_seq = cs->staged_sequence;
1332 /* Validate ALL the CS chunks before submitting the CS */
1333 for (i = 0 ; i < num_chunks ; i++) {
1334 struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1335 enum hl_queue_type queue_type;
1336 bool is_kernel_allocated_cb;
1338 rc = validate_queue_index(hdev, chunk, &queue_type,
1339 &is_kernel_allocated_cb);
1341 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1342 atomic64_inc(&cntr->validation_drop_cnt);
1343 goto free_cs_object;
1346 if (is_kernel_allocated_cb) {
1347 cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
1350 &ctx->cs_counters.validation_drop_cnt);
1351 atomic64_inc(&cntr->validation_drop_cnt);
1353 goto free_cs_object;
1356 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1359 if (queue_type == QUEUE_TYPE_EXT ||
1360 queue_type == QUEUE_TYPE_HW) {
1361 int_queues_only = false;
1364 * store which stream are being used for external/HW
1367 if (hdev->supports_wait_for_multi_cs)
1368 stream_master_qid_map |=
1369 get_stream_master_qid_mask(hdev,
1370 chunk->queue_index);
1373 job = hl_cs_allocate_job(hdev, queue_type,
1374 is_kernel_allocated_cb);
1376 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1377 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1378 dev_err(hdev->dev, "Failed to allocate a new job\n");
1380 if (is_kernel_allocated_cb)
1383 goto free_cs_object;
1389 job->user_cb_size = chunk->cb_size;
1390 job->hw_queue_id = chunk->queue_index;
1392 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1394 list_add_tail(&job->cs_node, &cs->job_list);
1397 * Increment CS reference. When CS reference is 0, CS is
1398 * done and can be signaled to user and free all its resources
1399 * Only increment for JOB on external or H/W queues, because
1400 * only for those JOBs we get completion
1402 if (cs_needs_completion(cs) &&
1403 (job->queue_type == QUEUE_TYPE_EXT ||
1404 job->queue_type == QUEUE_TYPE_HW))
1407 hl_debugfs_add_job(hdev, job);
1409 rc = cs_parser(hpriv, job);
1411 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1412 atomic64_inc(&cntr->parsing_drop_cnt);
1414 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1415 cs->ctx->asid, cs->sequence, job->id, rc);
1416 goto free_cs_object;
1420 /* We allow a CS with any queue type combination as long as it does
1421 * not get a completion
1423 if (int_queues_only && cs_needs_completion(cs)) {
1424 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1425 atomic64_inc(&cntr->validation_drop_cnt);
1427 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1428 cs->ctx->asid, cs->sequence);
1430 goto free_cs_object;
1434 * store the (external/HW queues) streams used by the CS in the
1435 * fence object for multi-CS completion
1437 if (hdev->supports_wait_for_multi_cs)
1438 cs->fence->stream_master_qid_map = stream_master_qid_map;
1440 rc = hl_hw_queue_schedule_cs(cs);
1444 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
1445 cs->ctx->asid, cs->sequence, rc);
1446 goto free_cs_object;
1449 *signal_initial_sob_count = cs->initial_sob_count;
1451 rc = HL_CS_STATUS_SUCCESS;
1455 atomic_dec(&cb->cs_cnt);
1458 cs_rollback(hdev, cs);
1459 *cs_seq = ULLONG_MAX;
1460 /* The path below is both for good and erroneous exits */
1462 /* We finished with the CS in this function, so put the ref */
1464 free_cs_chunk_array:
1465 kfree(cs_chunk_array);
1470 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1473 struct hl_device *hdev = hpriv->hdev;
1474 struct hl_ctx *ctx = hpriv->ctx;
1475 bool need_soft_reset = false;
1476 int rc = 0, do_ctx_switch;
1477 void __user *chunks;
1478 u32 num_chunks, tmp;
1482 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1484 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1485 mutex_lock(&hpriv->restore_phase_mutex);
1487 if (do_ctx_switch) {
1488 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1490 dev_err_ratelimited(hdev->dev,
1491 "Failed to switch to context %d, rejecting CS! %d\n",
1494 * If we timedout, or if the device is not IDLE
1495 * while we want to do context-switch (-EBUSY),
1496 * we need to soft-reset because QMAN is
1497 * probably stuck. However, we can't call to
1498 * reset here directly because of deadlock, so
1499 * need to do it at the very end of this
1502 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1503 need_soft_reset = true;
1504 mutex_unlock(&hpriv->restore_phase_mutex);
1509 hdev->asic_funcs->restore_phase_topology(hdev);
1511 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1512 num_chunks = args->in.num_chunks_restore;
1516 "Need to run restore phase but restore CS is empty\n");
1519 rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1520 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1523 mutex_unlock(&hpriv->restore_phase_mutex);
1527 "Failed to submit restore CS for context %d (%d)\n",
1532 /* Need to wait for restore completion before execution phase */
1534 enum hl_cs_wait_status status;
1536 ret = _hl_cs_wait_ioctl(hdev, ctx,
1537 jiffies_to_usecs(hdev->timeout_jiffies),
1538 *cs_seq, &status, NULL);
1540 if (ret == -ERESTARTSYS) {
1541 usleep_range(100, 200);
1546 "Restore CS for context %d failed to complete %d\n",
1553 ctx->thread_ctx_switch_wait_token = 1;
1555 } else if (!ctx->thread_ctx_switch_wait_token) {
1556 rc = hl_poll_timeout_memory(hdev,
1557 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1558 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1560 if (rc == -ETIMEDOUT) {
1562 "context switch phase timeout (%d)\n", tmp);
1568 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1569 hl_device_reset(hdev, 0);
1575 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1576 * if the SOB value reaches the max value move to the other SOB reserved
1578 * @hdev: pointer to device structure
1579 * @q_idx: stream queue index
1580 * @hw_sob: the H/W SOB used in this signal CS.
1581 * @count: signals count
1582 * @encaps_sig: tells whether it's reservation for encaps signals or not.
1584 * Note that this function must be called while hw_queues_lock is taken.
1586 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1587 struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1590 struct hl_sync_stream_properties *prop;
1591 struct hl_hw_sob *sob = *hw_sob, *other_sob;
1592 u8 other_sob_offset;
1594 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1598 /* check for wraparound */
1599 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1601 * Decrement as we reached the max value.
1602 * The release function won't be called here as we've
1603 * just incremented the refcount right before calling this
1606 hw_sob_put_err(sob);
1609 * check the other sob value, if it still in use then fail
1610 * otherwise make the switch
1612 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1613 other_sob = &prop->hw_sob[other_sob_offset];
1615 if (kref_read(&other_sob->kref) != 1) {
1616 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1622 * next_sob_val always points to the next available signal
1623 * in the sob, so in encaps signals it will be the next one
1624 * after reserving the required amount.
1627 prop->next_sob_val = count + 1;
1629 prop->next_sob_val = count;
1631 /* only two SOBs are currently in use */
1632 prop->curr_sob_offset = other_sob_offset;
1633 *hw_sob = other_sob;
1636 * check if other_sob needs reset, then do it before using it
1637 * for the reservation or the next signal cs.
1638 * we do it here, and for both encaps and regular signal cs
1639 * cases in order to avoid possible races of two kref_put
1640 * of the sob which can occur at the same time if we move the
1641 * sob reset(kref_put) to cs_do_release function.
1642 * in addition, if we have combination of cs signal and
1643 * encaps, and at the point we need to reset the sob there was
1644 * no more reservations and only signal cs keep coming,
1645 * in such case we need signal_cs to put the refcount and
1648 if (other_sob->need_reset)
1649 hw_sob_put(other_sob);
1652 /* set reset indication for the sob */
1653 sob->need_reset = true;
1654 hw_sob_get(other_sob);
1657 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1658 prop->curr_sob_offset, q_idx);
1660 prop->next_sob_val += count;
1666 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1667 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1668 bool encaps_signals)
1670 u64 *signal_seq_arr = NULL;
1671 u32 size_to_copy, signal_seq_arr_len;
1674 if (encaps_signals) {
1675 *signal_seq = chunk->encaps_signal_seq;
1679 signal_seq_arr_len = chunk->num_signal_seq_arr;
1681 /* currently only one signal seq is supported */
1682 if (signal_seq_arr_len != 1) {
1683 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1684 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1686 "Wait for signal CS supports only one signal CS seq\n");
1690 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1691 sizeof(*signal_seq_arr),
1693 if (!signal_seq_arr)
1694 signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1695 sizeof(*signal_seq_arr),
1697 if (!signal_seq_arr) {
1698 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1699 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1703 size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1704 if (copy_from_user(signal_seq_arr,
1705 u64_to_user_ptr(chunk->signal_seq_arr),
1707 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1708 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1710 "Failed to copy signal seq array from user\n");
1715 /* currently it is guaranteed to have only one signal seq */
1716 *signal_seq = signal_seq_arr[0];
1719 kfree(signal_seq_arr);
1724 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1725 struct hl_ctx *ctx, struct hl_cs *cs,
1726 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1728 struct hl_cs_counters_atomic *cntr;
1729 struct hl_cs_job *job;
1733 cntr = &hdev->aggregated_cs_counters;
1735 job = hl_cs_allocate_job(hdev, q_type, true);
1737 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1738 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1739 dev_err(hdev->dev, "Failed to allocate a new job\n");
1743 if (cs->type == CS_TYPE_WAIT)
1744 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1746 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1748 cb = hl_cb_kernel_create(hdev, cb_size,
1749 q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
1751 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1752 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1760 atomic_inc(&job->user_cb->cs_cnt);
1761 job->user_cb_size = cb_size;
1762 job->hw_queue_id = q_idx;
1764 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1765 && cs->encaps_signals)
1766 job->encaps_sig_wait_offset = encaps_signal_offset;
1768 * No need in parsing, user CB is the patched CB.
1769 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1770 * the CB idr anymore and to decrement its refcount as it was
1771 * incremented inside hl_cb_kernel_create().
1773 job->patched_cb = job->user_cb;
1774 job->job_cb_size = job->user_cb_size;
1775 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1777 /* increment refcount as for external queues we get completion */
1780 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1782 list_add_tail(&job->cs_node, &cs->job_list);
1784 hl_debugfs_add_job(hdev, job);
1789 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
1790 u32 q_idx, u32 count,
1791 u32 *handle_id, u32 *sob_addr,
1794 struct hw_queue_properties *hw_queue_prop;
1795 struct hl_sync_stream_properties *prop;
1796 struct hl_device *hdev = hpriv->hdev;
1797 struct hl_cs_encaps_sig_handle *handle;
1798 struct hl_encaps_signals_mgr *mgr;
1799 struct hl_hw_sob *hw_sob;
1803 if (count >= HL_MAX_SOB_VAL) {
1804 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
1810 if (q_idx >= hdev->asic_prop.max_queues) {
1811 dev_err(hdev->dev, "Queue index %d is invalid\n",
1817 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
1819 if (!hw_queue_prop->supports_sync_stream) {
1821 "Queue index %d does not support sync stream operations\n",
1827 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1829 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
1835 handle->count = count;
1837 hl_ctx_get(hdev, hpriv->ctx);
1838 handle->ctx = hpriv->ctx;
1839 mgr = &hpriv->ctx->sig_mgr;
1841 spin_lock(&mgr->lock);
1842 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
1843 spin_unlock(&mgr->lock);
1846 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
1851 handle->id = hdl_id;
1852 handle->q_idx = q_idx;
1853 handle->hdev = hdev;
1854 kref_init(&handle->refcount);
1856 hdev->asic_funcs->hw_queues_lock(hdev);
1858 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1861 * Increment the SOB value by count by user request
1862 * to reserve those signals
1863 * check if the signals amount to reserve is not exceeding the max sob
1864 * value, if yes then switch sob.
1866 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
1869 dev_err(hdev->dev, "Failed to switch SOB\n");
1870 hdev->asic_funcs->hw_queues_unlock(hdev);
1874 /* set the hw_sob to the handle after calling the sob wraparound handler
1875 * since sob could have changed.
1877 handle->hw_sob = hw_sob;
1879 /* store the current sob value for unreserve validity check, and
1880 * signal offset support
1882 handle->pre_sob_val = prop->next_sob_val - handle->count;
1884 *signals_count = prop->next_sob_val;
1885 hdev->asic_funcs->hw_queues_unlock(hdev);
1887 *sob_addr = handle->hw_sob->sob_addr;
1888 *handle_id = hdl_id;
1891 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
1892 hw_sob->sob_id, handle->hw_sob->sob_addr,
1893 prop->next_sob_val - 1, q_idx, hdl_id);
1897 spin_lock(&mgr->lock);
1898 idr_remove(&mgr->handles, hdl_id);
1899 spin_unlock(&mgr->lock);
1902 hl_ctx_put(handle->ctx);
1909 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
1911 struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
1912 struct hl_sync_stream_properties *prop;
1913 struct hl_device *hdev = hpriv->hdev;
1914 struct hl_encaps_signals_mgr *mgr;
1915 struct hl_hw_sob *hw_sob;
1916 u32 q_idx, sob_addr;
1919 mgr = &hpriv->ctx->sig_mgr;
1921 spin_lock(&mgr->lock);
1922 encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
1923 if (encaps_sig_hdl) {
1924 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
1925 handle_id, encaps_sig_hdl->hw_sob->sob_addr,
1926 encaps_sig_hdl->count);
1928 hdev->asic_funcs->hw_queues_lock(hdev);
1930 q_idx = encaps_sig_hdl->q_idx;
1931 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1932 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
1933 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
1935 /* Check if sob_val got out of sync due to other
1936 * signal submission requests which were handled
1937 * between the reserve-unreserve calls or SOB switch
1938 * upon reaching SOB max value.
1940 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
1941 != prop->next_sob_val ||
1942 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
1943 dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
1944 encaps_sig_hdl->pre_sob_val,
1945 (prop->next_sob_val - encaps_sig_hdl->count));
1947 hdev->asic_funcs->hw_queues_unlock(hdev);
1953 * Decrement the SOB value by count by user request
1954 * to unreserve those signals
1956 prop->next_sob_val -= encaps_sig_hdl->count;
1958 hdev->asic_funcs->hw_queues_unlock(hdev);
1962 /* Release the id and free allocated memory of the handle */
1963 idr_remove(&mgr->handles, handle_id);
1964 hl_ctx_put(encaps_sig_hdl->ctx);
1965 kfree(encaps_sig_hdl);
1968 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
1971 spin_unlock(&mgr->lock);
1976 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
1977 void __user *chunks, u32 num_chunks,
1978 u64 *cs_seq, u32 flags, u32 timeout,
1979 u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
1981 struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
1982 bool handle_found = false, is_wait_cs = false,
1983 wait_cs_submitted = false,
1984 cs_encaps_signals = false;
1985 struct hl_cs_chunk *cs_chunk_array, *chunk;
1986 bool staged_cs_with_encaps_signals = false;
1987 struct hw_queue_properties *hw_queue_prop;
1988 struct hl_device *hdev = hpriv->hdev;
1989 struct hl_cs_compl *sig_waitcs_cmpl;
1990 u32 q_idx, collective_engine_id = 0;
1991 struct hl_cs_counters_atomic *cntr;
1992 struct hl_fence *sig_fence = NULL;
1993 struct hl_ctx *ctx = hpriv->ctx;
1994 enum hl_queue_type q_type;
1999 cntr = &hdev->aggregated_cs_counters;
2000 *cs_seq = ULLONG_MAX;
2002 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
2007 /* currently it is guaranteed to have only one chunk */
2008 chunk = &cs_chunk_array[0];
2010 if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2011 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2012 atomic64_inc(&cntr->validation_drop_cnt);
2013 dev_err(hdev->dev, "Queue index %d is invalid\n",
2014 chunk->queue_index);
2016 goto free_cs_chunk_array;
2019 q_idx = chunk->queue_index;
2020 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2021 q_type = hw_queue_prop->type;
2023 if (!hw_queue_prop->supports_sync_stream) {
2024 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2025 atomic64_inc(&cntr->validation_drop_cnt);
2027 "Queue index %d does not support sync stream operations\n",
2030 goto free_cs_chunk_array;
2033 if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
2034 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2035 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2036 atomic64_inc(&cntr->validation_drop_cnt);
2038 "Queue index %d is invalid\n", q_idx);
2040 goto free_cs_chunk_array;
2043 if (!hdev->nic_ports_mask) {
2044 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2045 atomic64_inc(&cntr->validation_drop_cnt);
2047 "Collective operations not supported when NIC ports are disabled");
2049 goto free_cs_chunk_array;
2052 collective_engine_id = chunk->collective_engine_id;
2055 is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
2056 cs_type == CS_TYPE_COLLECTIVE_WAIT);
2058 cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
2061 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2062 ctx, cs_encaps_signals);
2064 goto free_cs_chunk_array;
2066 if (cs_encaps_signals) {
2067 /* check if cs sequence has encapsulated
2073 spin_lock(&ctx->sig_mgr.lock);
2074 idp = &ctx->sig_mgr.handles;
2075 idr_for_each_entry(idp, encaps_sig_hdl, id) {
2076 if (encaps_sig_hdl->cs_seq == signal_seq) {
2077 /* get refcount to protect removing this handle from idr,
2078 * needed when multiple wait cs are used with offset
2079 * to wait on reserved encaps signals.
2080 * Since kref_put of this handle is executed outside the
2081 * current lock, it is possible that the handle refcount
2082 * is 0 but it yet to be removed from the list. In this
2083 * case need to consider the handle as not valid.
2085 if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
2086 handle_found = true;
2090 spin_unlock(&ctx->sig_mgr.lock);
2092 if (!handle_found) {
2093 /* treat as signal CS already finished */
2094 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2097 goto free_cs_chunk_array;
2100 /* validate also the signal offset value */
2101 if (chunk->encaps_signal_offset >
2102 encaps_sig_hdl->count) {
2103 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2104 chunk->encaps_signal_offset,
2105 encaps_sig_hdl->count);
2107 goto free_cs_chunk_array;
2111 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2112 if (IS_ERR(sig_fence)) {
2113 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2114 atomic64_inc(&cntr->validation_drop_cnt);
2116 "Failed to get signal CS with seq 0x%llx\n",
2118 rc = PTR_ERR(sig_fence);
2119 goto free_cs_chunk_array;
2123 /* signal CS already finished */
2125 goto free_cs_chunk_array;
2129 container_of(sig_fence, struct hl_cs_compl, base_fence);
2131 staged_cs_with_encaps_signals = !!
2132 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2133 (flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2135 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2136 !staged_cs_with_encaps_signals) {
2137 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2138 atomic64_inc(&cntr->validation_drop_cnt);
2140 "CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2142 hl_fence_put(sig_fence);
2144 goto free_cs_chunk_array;
2147 if (completion_done(&sig_fence->completion)) {
2148 /* signal CS already finished */
2149 hl_fence_put(sig_fence);
2151 goto free_cs_chunk_array;
2155 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2158 hl_fence_put(sig_fence);
2160 goto free_cs_chunk_array;
2164 * Save the signal CS fence for later initialization right before
2165 * hanging the wait CS on the queue.
2166 * for encaps signals case, we save the cs sequence and handle pointer
2167 * for later initialization.
2170 cs->signal_fence = sig_fence;
2171 /* store the handle pointer, so we don't have to
2172 * look for it again, later on the flow
2173 * when we need to set SOB info in hw_queue.
2175 if (cs->encaps_signals)
2176 cs->encaps_sig_hdl = encaps_sig_hdl;
2179 hl_debugfs_add_cs(cs);
2181 *cs_seq = cs->sequence;
2183 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2184 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2185 q_idx, chunk->encaps_signal_offset);
2186 else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2187 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2188 cs, q_idx, collective_engine_id,
2189 chunk->encaps_signal_offset);
2191 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2192 atomic64_inc(&cntr->validation_drop_cnt);
2197 goto free_cs_object;
2199 rc = hl_hw_queue_schedule_cs(cs);
2201 /* In case wait cs failed here, it means the signal cs
2202 * already completed. we want to free all it's related objects
2203 * but we don't want to fail the ioctl.
2207 else if (rc != -EAGAIN)
2209 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
2210 ctx->asid, cs->sequence, rc);
2211 goto free_cs_object;
2214 *signal_sob_addr_offset = cs->sob_addr_offset;
2215 *signal_initial_sob_count = cs->initial_sob_count;
2217 rc = HL_CS_STATUS_SUCCESS;
2219 wait_cs_submitted = true;
2223 cs_rollback(hdev, cs);
2224 *cs_seq = ULLONG_MAX;
2225 /* The path below is both for good and erroneous exits */
2227 /* We finished with the CS in this function, so put the ref */
2229 free_cs_chunk_array:
2230 if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
2232 kref_put(&encaps_sig_hdl->refcount,
2233 hl_encaps_handle_do_release);
2234 kfree(cs_chunk_array);
2239 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
2241 union hl_cs_args *args = data;
2242 enum hl_cs_type cs_type = 0;
2243 u64 cs_seq = ULONG_MAX;
2244 void __user *chunks;
2245 u32 num_chunks, flags, timeout,
2246 signals_count = 0, sob_addr = 0, handle_id = 0;
2247 u16 sob_initial_count = 0;
2250 rc = hl_cs_sanity_checks(hpriv, args);
2254 rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2258 cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2259 ~HL_CS_FLAGS_FORCE_RESTORE);
2260 chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2261 num_chunks = args->in.num_chunks_execute;
2262 flags = args->in.cs_flags;
2264 /* In case this is a staged CS, user should supply the CS sequence */
2265 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2266 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2267 cs_seq = args->in.seq;
2269 timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2270 ? msecs_to_jiffies(args->in.timeout * 1000)
2271 : hpriv->hdev->timeout_jiffies;
2274 case CS_TYPE_SIGNAL:
2276 case CS_TYPE_COLLECTIVE_WAIT:
2277 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2278 &cs_seq, args->in.cs_flags, timeout,
2279 &sob_addr, &sob_initial_count);
2281 case CS_RESERVE_SIGNALS:
2282 rc = cs_ioctl_reserve_signals(hpriv,
2283 args->in.encaps_signals_q_idx,
2284 args->in.encaps_signals_count,
2285 &handle_id, &sob_addr, &signals_count);
2287 case CS_UNRESERVE_SIGNALS:
2288 rc = cs_ioctl_unreserve_signals(hpriv,
2289 args->in.encaps_sig_handle_id);
2292 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2294 args->in.encaps_sig_handle_id,
2295 timeout, &sob_initial_count);
2299 if (rc != -EAGAIN) {
2300 memset(args, 0, sizeof(*args));
2303 case CS_RESERVE_SIGNALS:
2304 args->out.handle_id = handle_id;
2305 args->out.sob_base_addr_offset = sob_addr;
2306 args->out.count = signals_count;
2308 case CS_TYPE_SIGNAL:
2309 args->out.sob_base_addr_offset = sob_addr;
2310 args->out.sob_count_before_submission = sob_initial_count;
2311 args->out.seq = cs_seq;
2313 case CS_TYPE_DEFAULT:
2314 args->out.sob_count_before_submission = sob_initial_count;
2315 args->out.seq = cs_seq;
2318 args->out.seq = cs_seq;
2322 args->out.status = rc;
2328 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2329 enum hl_cs_wait_status *status, u64 timeout_us,
2332 struct hl_device *hdev = ctx->hdev;
2336 if (IS_ERR(fence)) {
2337 rc = PTR_ERR(fence);
2339 dev_notice_ratelimited(hdev->dev,
2340 "Can't wait on CS %llu because current CS is at seq %llu\n",
2341 seq, ctx->cs_sequence);
2347 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2348 seq, ctx->cs_sequence);
2350 *status = CS_WAIT_STATUS_GONE;
2355 completion_rc = completion_done(&fence->completion);
2357 unsigned long timeout;
2359 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2360 timeout_us : usecs_to_jiffies(timeout_us);
2362 wait_for_completion_interruptible_timeout(
2363 &fence->completion, timeout);
2366 if (completion_rc > 0) {
2367 *status = CS_WAIT_STATUS_COMPLETED;
2369 *timestamp = ktime_to_ns(fence->timestamp);
2371 *status = CS_WAIT_STATUS_BUSY;
2374 if (fence->error == -ETIMEDOUT)
2376 else if (fence->error == -EIO)
2383 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2385 * @mcs_data: multi-CS internal data
2386 * @mcs_compl: multi-CS completion structure
2388 * @return 0 on success, otherwise non 0 error code
2390 * The function iterates on all CS sequence in the list and set bit in
2391 * completion_bitmap for each completed CS.
2392 * While iterating, the function sets the stream map of each fence in the fence
2393 * array in the completion QID stream map to be used by CSs to perform
2394 * completion to the multi-CS context.
2395 * This function shall be called after taking context ref
2397 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
2399 struct hl_fence **fence_ptr = mcs_data->fence_arr;
2400 struct hl_device *hdev = mcs_data->ctx->hdev;
2401 int i, rc, arr_len = mcs_data->arr_len;
2402 u64 *seq_arr = mcs_data->seq_arr;
2403 ktime_t max_ktime, first_cs_time;
2404 enum hl_cs_wait_status status;
2406 memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
2408 /* get all fences under the same lock */
2409 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2414 * re-initialize the completion here to handle 2 possible cases:
2415 * 1. CS will complete the multi-CS prior clearing the completion. in which
2416 * case the fence iteration is guaranteed to catch the CS completion.
2417 * 2. the completion will occur after re-init of the completion.
2418 * in which case we will wake up immediately in wait_for_completion.
2420 reinit_completion(&mcs_compl->completion);
2423 * set to maximum time to verify timestamp is valid: if at the end
2424 * this value is maintained- no timestamp was updated
2426 max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2427 first_cs_time = max_ktime;
2429 for (i = 0; i < arr_len; i++, fence_ptr++) {
2430 struct hl_fence *fence = *fence_ptr;
2433 * In order to prevent case where we wait until timeout even though a CS associated
2434 * with the multi-CS actually completed we do things in the below order:
2435 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2436 * any CS can, potentially, complete the multi CS for the specific QID (note
2437 * that once completion is initialized, calling complete* and then wait on the
2438 * completion will cause it to return at once)
2439 * 2. only after allowing multi-CS completion for the specific QID we check whether
2440 * the specific CS already completed (and thus the wait for completion part will
2441 * be skipped). if the CS not completed it is guaranteed that completing CS will
2442 * wake up the completion.
2445 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2448 * function won't sleep as it is called with timeout 0 (i.e.
2451 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence,
2455 "wait_for_fence error :%d for CS seq %llu\n",
2461 case CS_WAIT_STATUS_BUSY:
2462 /* CS did not finished, QID to wait on already stored */
2464 case CS_WAIT_STATUS_COMPLETED:
2466 * Using mcs_handling_done to avoid possibility of mcs_data
2467 * returns to user indicating CS completed before it finished
2468 * all of its mcs handling, to avoid race the next time the
2469 * user waits for mcs.
2470 * note: when reaching this case fence is definitely not NULL
2471 * but NULL check was added to overcome static analysis
2473 if (fence && !fence->mcs_handling_done) {
2475 * in case multi CS is completed but MCS handling not done
2476 * we "complete" the multi CS to prevent it from waiting
2477 * until time-out and the "multi-CS handling done" will have
2478 * another chance at the next iteration
2480 complete_all(&mcs_compl->completion);
2484 mcs_data->completion_bitmap |= BIT(i);
2486 * For all completed CSs we take the earliest timestamp.
2487 * For this we have to validate that the timestamp is
2488 * earliest of all timestamps so far.
2490 if (mcs_data->update_ts &&
2491 (ktime_compare(fence->timestamp, first_cs_time) < 0))
2492 first_cs_time = fence->timestamp;
2494 case CS_WAIT_STATUS_GONE:
2495 mcs_data->update_ts = false;
2496 mcs_data->gone_cs = true;
2498 * It is possible to get an old sequence numbers from user
2499 * which related to already completed CSs and their fences
2500 * already gone. In this case, CS set as completed but
2501 * no need to consider its QID for mcs completion.
2503 mcs_data->completion_bitmap |= BIT(i);
2506 dev_err(hdev->dev, "Invalid fence status\n");
2512 hl_fences_put(mcs_data->fence_arr, arr_len);
2514 if (mcs_data->update_ts &&
2515 (ktime_compare(first_cs_time, max_ktime) != 0))
2516 mcs_data->timestamp = ktime_to_ns(first_cs_time);
2521 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2522 u64 timeout_us, u64 seq,
2523 enum hl_cs_wait_status *status, s64 *timestamp)
2525 struct hl_fence *fence;
2531 hl_ctx_get(hdev, ctx);
2533 fence = hl_ctx_get_fence(ctx, seq);
2535 rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2536 hl_fence_put(fence);
2542 static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
2544 if (usecs <= U32_MAX)
2545 return usecs_to_jiffies(usecs);
2548 * If the value in nanoseconds is larger than 64 bit, use the largest
2551 if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
2552 return nsecs_to_jiffies(U64_MAX);
2554 return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
2558 * hl_wait_multi_cs_completion_init - init completion structure
2560 * @hdev: pointer to habanalabs device structure
2561 * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2562 * master QID to wait on
2564 * @return valid completion struct pointer on success, otherwise error pointer
2566 * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2567 * the function gets the first available completion (by marking it "used")
2568 * and initialize its values.
2570 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2572 struct multi_cs_completion *mcs_compl;
2575 /* find free multi_cs completion structure */
2576 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2577 mcs_compl = &hdev->multi_cs_completion[i];
2578 spin_lock(&mcs_compl->lock);
2579 if (!mcs_compl->used) {
2580 mcs_compl->used = 1;
2581 mcs_compl->timestamp = 0;
2583 * init QID map to 0 to avoid completion by CSs. the actual QID map
2584 * to multi-CS CSs will be set incrementally at a later stage
2586 mcs_compl->stream_master_qid_map = 0;
2587 spin_unlock(&mcs_compl->lock);
2590 spin_unlock(&mcs_compl->lock);
2593 if (i == MULTI_CS_MAX_USER_CTX) {
2594 dev_err(hdev->dev, "no available multi-CS completion structure\n");
2595 return ERR_PTR(-ENOMEM);
2601 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2604 * @mcs_compl: pointer to the completion structure
2606 static void hl_wait_multi_cs_completion_fini(
2607 struct multi_cs_completion *mcs_compl)
2610 * free completion structure, do it under lock to be in-sync with the
2611 * thread that signals completion
2613 spin_lock(&mcs_compl->lock);
2614 mcs_compl->used = 0;
2615 spin_unlock(&mcs_compl->lock);
2619 * hl_wait_multi_cs_completion - wait for first CS to complete
2621 * @mcs_data: multi-CS internal data
2623 * @return 0 on success, otherwise non 0 error code
2625 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
2626 struct multi_cs_completion *mcs_compl)
2630 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2631 mcs_data->timeout_jiffies);
2633 /* update timestamp */
2634 if (completion_rc > 0)
2635 mcs_data->timestamp = mcs_compl->timestamp;
2637 mcs_data->wait_status = completion_rc;
2643 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2645 * @hdev: pointer to habanalabs device structure
2647 void hl_multi_cs_completion_init(struct hl_device *hdev)
2649 struct multi_cs_completion *mcs_cmpl;
2652 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2653 mcs_cmpl = &hdev->multi_cs_completion[i];
2655 spin_lock_init(&mcs_cmpl->lock);
2656 init_completion(&mcs_cmpl->completion);
2661 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
2663 * @hpriv: pointer to the private data of the fd
2664 * @data: pointer to multi-CS wait ioctl in/out args
2667 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2669 struct multi_cs_completion *mcs_compl;
2670 struct hl_device *hdev = hpriv->hdev;
2671 struct multi_cs_data mcs_data = {0};
2672 union hl_wait_cs_args *args = data;
2673 struct hl_ctx *ctx = hpriv->ctx;
2674 struct hl_fence **fence_arr;
2675 void __user *seq_arr;
2681 if (!hdev->supports_wait_for_multi_cs) {
2682 dev_err(hdev->dev, "Wait for multi CS is not supported\n");
2686 seq_arr_len = args->in.seq_arr_len;
2688 if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
2689 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
2690 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
2694 /* allocate memory for sequence array */
2696 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
2700 /* copy CS sequence array from user */
2701 seq_arr = (void __user *) (uintptr_t) args->in.seq;
2702 size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
2703 if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
2704 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
2709 /* allocate array for the fences */
2710 fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
2716 /* initialize the multi-CS internal data */
2718 mcs_data.seq_arr = cs_seq_arr;
2719 mcs_data.fence_arr = fence_arr;
2720 mcs_data.arr_len = seq_arr_len;
2722 hl_ctx_get(hdev, ctx);
2724 /* wait (with timeout) for the first CS to be completed */
2725 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
2726 mcs_compl = hl_wait_multi_cs_completion_init(hdev);
2727 if (IS_ERR(mcs_compl)) {
2728 rc = PTR_ERR(mcs_compl);
2732 /* poll all CS fences, extract timestamp */
2733 mcs_data.update_ts = true;
2734 rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2736 * skip wait for CS completion when one of the below is true:
2737 * - an error on the poll function
2738 * - one or more CS in the list completed
2739 * - the user called ioctl with timeout 0
2741 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
2742 goto completion_fini;
2745 rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
2746 if (rc || (mcs_data.wait_status == 0))
2750 * poll fences once again to update the CS map.
2751 * no timestamp should be updated this time.
2753 mcs_data.update_ts = false;
2754 rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
2756 if (rc || mcs_data.completion_bitmap)
2760 * if hl_wait_multi_cs_completion returned before timeout (i.e.
2761 * it got a completion) it either got completed by CS in the multi CS list
2762 * (in which case the indication will be non empty completion_bitmap) or it
2763 * got completed by CS submitted to one of the shared stream master but
2764 * not in the multi CS list (in which case we should wait again but modify
2765 * the timeout and set timestamp as zero to let a CS related to the current
2766 * multi-CS set a new, relevant, timestamp)
2768 mcs_data.timeout_jiffies = mcs_data.wait_status;
2769 mcs_compl->timestamp = 0;
2773 hl_wait_multi_cs_completion_fini(mcs_compl);
2785 if (mcs_data.wait_status == -ERESTARTSYS) {
2786 dev_err_ratelimited(hdev->dev,
2787 "user process got signal while waiting for Multi-CS\n");
2791 /* update output args */
2792 memset(args, 0, sizeof(*args));
2794 if (mcs_data.completion_bitmap) {
2795 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2796 args->out.cs_completion_map = mcs_data.completion_bitmap;
2798 /* if timestamp not 0- it's valid */
2799 if (mcs_data.timestamp) {
2800 args->out.timestamp_nsec = mcs_data.timestamp;
2801 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2804 /* update if some CS was gone */
2805 if (!mcs_data.timestamp)
2806 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2808 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2814 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
2816 struct hl_device *hdev = hpriv->hdev;
2817 union hl_wait_cs_args *args = data;
2818 enum hl_cs_wait_status status;
2819 u64 seq = args->in.seq;
2823 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
2824 &status, ×tamp);
2826 if (rc == -ERESTARTSYS) {
2827 dev_err_ratelimited(hdev->dev,
2828 "user process got signal while waiting for CS handle %llu\n",
2833 memset(args, 0, sizeof(*args));
2836 if (rc == -ETIMEDOUT) {
2837 dev_err_ratelimited(hdev->dev,
2838 "CS %llu has timed-out while user process is waiting for it\n",
2840 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
2841 } else if (rc == -EIO) {
2842 dev_err_ratelimited(hdev->dev,
2843 "CS %llu has been aborted while user process is waiting for it\n",
2845 args->out.status = HL_WAIT_CS_STATUS_ABORTED;
2851 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
2852 args->out.timestamp_nsec = timestamp;
2856 case CS_WAIT_STATUS_GONE:
2857 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
2859 case CS_WAIT_STATUS_COMPLETED:
2860 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
2862 case CS_WAIT_STATUS_BUSY:
2864 args->out.status = HL_WAIT_CS_STATUS_BUSY;
2871 static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
2872 struct hl_cb *cq_cb,
2873 u64 ts_offset, u64 cq_offset, u64 target_value,
2874 spinlock_t *wait_list_lock,
2875 struct hl_user_pending_interrupt **pend)
2877 struct hl_user_pending_interrupt *requested_offset_record =
2878 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
2880 struct hl_user_pending_interrupt *cb_last =
2881 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
2882 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
2883 unsigned long flags, iter_counter = 0;
2884 u64 current_cq_counter;
2886 /* Validate ts_offset not exceeding last max */
2887 if (requested_offset_record > cb_last) {
2888 dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
2889 (u64)(uintptr_t)cb_last);
2894 spin_lock_irqsave(wait_list_lock, flags);
2896 /* Unregister only if we didn't reach the target value
2897 * since in this case there will be no handling in irq context
2898 * and then it's safe to delete the node out of the interrupt list
2899 * then re-use it on other interrupt
2901 if (requested_offset_record->ts_reg_info.in_use) {
2902 current_cq_counter = *requested_offset_record->cq_kernel_addr;
2903 if (current_cq_counter < requested_offset_record->cq_target_value) {
2904 list_del(&requested_offset_record->wait_list_node);
2905 spin_unlock_irqrestore(wait_list_lock, flags);
2907 hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
2908 hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
2910 dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
2912 dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
2914 /* irq handling in the middle give it time to finish */
2915 spin_unlock_irqrestore(wait_list_lock, flags);
2916 usleep_range(1, 10);
2917 if (++iter_counter == MAX_TS_ITER_NUM) {
2918 dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
2925 spin_unlock_irqrestore(wait_list_lock, flags);
2928 /* Fill up the new registration node info */
2929 requested_offset_record->ts_reg_info.in_use = 1;
2930 requested_offset_record->ts_reg_info.ts_buff = ts_buff;
2931 requested_offset_record->ts_reg_info.cq_cb = cq_cb;
2932 requested_offset_record->ts_reg_info.timestamp_kernel_addr =
2933 (u64 *) ts_buff->user_buff_address + ts_offset;
2934 requested_offset_record->cq_kernel_addr =
2935 (u64 *) cq_cb->kernel_address + cq_offset;
2936 requested_offset_record->cq_target_value = target_value;
2938 *pend = requested_offset_record;
2940 dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
2941 (u64)(uintptr_t)requested_offset_record);
2945 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
2946 struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
2947 u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
2948 u64 target_value, struct hl_user_interrupt *interrupt,
2949 bool register_ts_record, u64 ts_handle, u64 ts_offset,
2950 u32 *status, u64 *timestamp)
2952 u32 cq_patched_handle, ts_patched_handle;
2953 struct hl_user_pending_interrupt *pend;
2954 struct hl_ts_buff *ts_buff;
2955 struct hl_cb *cq_cb;
2956 unsigned long timeout, flags;
2960 timeout = hl_usecs64_to_jiffies(timeout_us);
2962 hl_ctx_get(hdev, ctx);
2964 cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
2965 cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
2971 if (register_ts_record) {
2972 dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
2973 interrupt->interrupt_id, ts_offset, cq_counters_offset);
2975 ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
2976 ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
2982 /* Find first available record */
2983 rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
2984 cq_counters_offset, target_value,
2985 &interrupt->wait_list_lock, &pend);
2989 pend = kzalloc(sizeof(*pend), GFP_KERNEL);
2994 hl_fence_init(&pend->fence, ULONG_MAX);
2995 pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
2996 pend->cq_target_value = target_value;
2999 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3001 /* We check for completion value as interrupt could have been received
3002 * before we added the node to the wait list
3004 if (*pend->cq_kernel_addr >= target_value) {
3005 if (register_ts_record)
3006 pend->ts_reg_info.in_use = 0;
3007 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3009 *status = HL_WAIT_CS_STATUS_COMPLETED;
3011 if (register_ts_record) {
3012 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
3015 pend->fence.timestamp = ktime_get();
3018 } else if (!timeout_us) {
3019 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3020 *status = HL_WAIT_CS_STATUS_BUSY;
3021 pend->fence.timestamp = ktime_get();
3025 /* Add pending user interrupt to relevant list for the interrupt
3026 * handler to monitor.
3027 * Note that we cannot have sorted list by target value,
3028 * in order to shorten the list pass loop, since
3029 * same list could have nodes for different cq counter handle.
3031 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
3032 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3034 if (register_ts_record) {
3035 rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
3036 goto ts_registration_exit;
3039 /* Wait for interrupt handler to signal completion */
3040 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3042 if (completion_rc > 0) {
3043 *status = HL_WAIT_CS_STATUS_COMPLETED;
3045 if (completion_rc == -ERESTARTSYS) {
3046 dev_err_ratelimited(hdev->dev,
3047 "user process got signal while waiting for interrupt ID %d\n",
3048 interrupt->interrupt_id);
3050 *status = HL_WAIT_CS_STATUS_ABORTED;
3052 if (pend->fence.error == -EIO) {
3053 dev_err_ratelimited(hdev->dev,
3054 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3057 *status = HL_WAIT_CS_STATUS_ABORTED;
3059 /* The wait has timed-out. We don't know anything beyond that
3060 * because the workload wasn't submitted through the driver.
3061 * Therefore, from driver's perspective, the workload is still
3065 *status = HL_WAIT_CS_STATUS_BUSY;
3071 * We keep removing the node from list here, and not at the irq handler
3072 * for completion timeout case. and if it's a registration
3073 * for ts record, the node will be deleted in the irq handler after
3074 * we reach the target value.
3076 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3077 list_del(&pend->wait_list_node);
3078 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3081 *timestamp = ktime_to_ns(pend->fence.timestamp);
3084 ts_registration_exit:
3099 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
3100 u64 timeout_us, u64 user_address,
3101 u64 target_value, struct hl_user_interrupt *interrupt,
3106 struct hl_user_pending_interrupt *pend;
3107 unsigned long timeout, flags;
3108 u64 completion_value;
3112 timeout = hl_usecs64_to_jiffies(timeout_us);
3114 hl_ctx_get(hdev, ctx);
3116 pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3122 hl_fence_init(&pend->fence, ULONG_MAX);
3124 /* Add pending user interrupt to relevant list for the interrupt
3125 * handler to monitor
3127 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3128 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
3129 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3131 /* We check for completion value as interrupt could have been received
3132 * before we added the node to the wait list
3134 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3135 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3137 goto remove_pending_user_interrupt;
3140 if (completion_value >= target_value) {
3141 *status = HL_WAIT_CS_STATUS_COMPLETED;
3142 /* There was no interrupt, we assume the completion is now. */
3143 pend->fence.timestamp = ktime_get();
3145 *status = HL_WAIT_CS_STATUS_BUSY;
3148 if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
3149 goto remove_pending_user_interrupt;
3152 /* Wait for interrupt handler to signal completion */
3153 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3156 /* If timeout did not expire we need to perform the comparison.
3157 * If comparison fails, keep waiting until timeout expires
3159 if (completion_rc > 0) {
3160 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3161 /* reinit_completion must be called before we check for user
3162 * completion value, otherwise, if interrupt is received after
3163 * the comparison and before the next wait_for_completion,
3164 * we will reach timeout and fail
3166 reinit_completion(&pend->fence.completion);
3167 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3169 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3170 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3173 goto remove_pending_user_interrupt;
3176 if (completion_value >= target_value) {
3177 *status = HL_WAIT_CS_STATUS_COMPLETED;
3178 } else if (pend->fence.error) {
3179 dev_err_ratelimited(hdev->dev,
3180 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3182 /* set the command completion status as ABORTED */
3183 *status = HL_WAIT_CS_STATUS_ABORTED;
3185 timeout = completion_rc;
3188 } else if (completion_rc == -ERESTARTSYS) {
3189 dev_err_ratelimited(hdev->dev,
3190 "user process got signal while waiting for interrupt ID %d\n",
3191 interrupt->interrupt_id);
3194 /* The wait has timed-out. We don't know anything beyond that
3195 * because the workload wasn't submitted through the driver.
3196 * Therefore, from driver's perspective, the workload is still
3200 *status = HL_WAIT_CS_STATUS_BUSY;
3203 remove_pending_user_interrupt:
3204 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3205 list_del(&pend->wait_list_node);
3206 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3208 *timestamp = ktime_to_ns(pend->fence.timestamp);
3216 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3218 u16 interrupt_id, first_interrupt, last_interrupt;
3219 struct hl_device *hdev = hpriv->hdev;
3220 struct asic_fixed_properties *prop;
3221 struct hl_user_interrupt *interrupt;
3222 union hl_wait_cs_args *args = data;
3223 u32 status = HL_WAIT_CS_STATUS_BUSY;
3227 prop = &hdev->asic_prop;
3229 if (!prop->user_interrupt_count) {
3230 dev_err(hdev->dev, "no user interrupts allowed");
3234 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3236 first_interrupt = prop->first_available_user_msix_interrupt;
3237 last_interrupt = prop->first_available_user_msix_interrupt +
3238 prop->user_interrupt_count - 1;
3240 if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
3241 interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
3242 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3246 if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
3247 interrupt = &hdev->common_user_interrupt;
3249 interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
3251 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
3252 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
3253 args->in.interrupt_timeout_us, args->in.cq_counters_handle,
3254 args->in.cq_counters_offset,
3255 args->in.target, interrupt,
3256 !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
3257 args->in.timestamp_handle, args->in.timestamp_offset,
3258 &status, ×tamp);
3260 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3261 args->in.interrupt_timeout_us, args->in.addr,
3262 args->in.target, interrupt, &status,
3267 memset(args, 0, sizeof(*args));
3268 args->out.status = status;
3271 args->out.timestamp_nsec = timestamp;
3272 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3278 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3280 union hl_wait_cs_args *args = data;
3281 u32 flags = args->in.flags;
3284 /* If the device is not operational, no point in waiting for any command submission or
3287 if (!hl_device_operational(hpriv->hdev, NULL))
3290 if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
3291 rc = hl_interrupt_wait_ioctl(hpriv, data);
3292 else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
3293 rc = hl_multi_cs_wait_ioctl(hpriv, data);
3295 rc = hl_cs_wait_ioctl(hpriv, data);