Merge tag 'vfs-6.8-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[sfrench/cifs-2.6.git] / drivers / gpu / drm / xe / xe_sched_job.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_sched_job.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/slab.h>
10
11 #include "xe_device.h"
12 #include "xe_exec_queue.h"
13 #include "xe_gt.h"
14 #include "xe_hw_engine_types.h"
15 #include "xe_hw_fence.h"
16 #include "xe_lrc.h"
17 #include "xe_macros.h"
18 #include "xe_trace.h"
19 #include "xe_vm.h"
20
21 static struct kmem_cache *xe_sched_job_slab;
22 static struct kmem_cache *xe_sched_job_parallel_slab;
23
24 int __init xe_sched_job_module_init(void)
25 {
26         xe_sched_job_slab =
27                 kmem_cache_create("xe_sched_job",
28                                   sizeof(struct xe_sched_job) +
29                                   sizeof(u64), 0,
30                                   SLAB_HWCACHE_ALIGN, NULL);
31         if (!xe_sched_job_slab)
32                 return -ENOMEM;
33
34         xe_sched_job_parallel_slab =
35                 kmem_cache_create("xe_sched_job_parallel",
36                                   sizeof(struct xe_sched_job) +
37                                   sizeof(u64) *
38                                   XE_HW_ENGINE_MAX_INSTANCE, 0,
39                                   SLAB_HWCACHE_ALIGN, NULL);
40         if (!xe_sched_job_parallel_slab) {
41                 kmem_cache_destroy(xe_sched_job_slab);
42                 return -ENOMEM;
43         }
44
45         return 0;
46 }
47
48 void xe_sched_job_module_exit(void)
49 {
50         kmem_cache_destroy(xe_sched_job_slab);
51         kmem_cache_destroy(xe_sched_job_parallel_slab);
52 }
53
54 static struct xe_sched_job *job_alloc(bool parallel)
55 {
56         return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
57                                  xe_sched_job_slab, GFP_KERNEL);
58 }
59
60 bool xe_sched_job_is_migration(struct xe_exec_queue *q)
61 {
62         return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
63 }
64
65 static void job_free(struct xe_sched_job *job)
66 {
67         struct xe_exec_queue *q = job->q;
68         bool is_migration = xe_sched_job_is_migration(q);
69
70         kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
71                         xe_sched_job_parallel_slab : xe_sched_job_slab, job);
72 }
73
74 static struct xe_device *job_to_xe(struct xe_sched_job *job)
75 {
76         return gt_to_xe(job->q->gt);
77 }
78
79 struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
80                                          u64 *batch_addr)
81 {
82         struct xe_sched_job *job;
83         struct dma_fence **fences;
84         bool is_migration = xe_sched_job_is_migration(q);
85         int err;
86         int i, j;
87         u32 width;
88
89         /* only a kernel context can submit a vm-less job */
90         XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
91
92         /* Migration and kernel engines have their own locking */
93         if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
94                 lockdep_assert_held(&q->vm->lock);
95                 if (!xe_vm_in_lr_mode(q->vm))
96                         xe_vm_assert_held(q->vm);
97         }
98
99         job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
100         if (!job)
101                 return ERR_PTR(-ENOMEM);
102
103         job->q = q;
104         kref_init(&job->refcount);
105         xe_exec_queue_get(job->q);
106
107         err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
108         if (err)
109                 goto err_free;
110
111         if (!xe_exec_queue_is_parallel(q)) {
112                 job->fence = xe_lrc_create_seqno_fence(q->lrc);
113                 if (IS_ERR(job->fence)) {
114                         err = PTR_ERR(job->fence);
115                         goto err_sched_job;
116                 }
117         } else {
118                 struct dma_fence_array *cf;
119
120                 fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
121                 if (!fences) {
122                         err = -ENOMEM;
123                         goto err_sched_job;
124                 }
125
126                 for (j = 0; j < q->width; ++j) {
127                         fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
128                         if (IS_ERR(fences[j])) {
129                                 err = PTR_ERR(fences[j]);
130                                 goto err_fences;
131                         }
132                 }
133
134                 cf = dma_fence_array_create(q->width, fences,
135                                             q->parallel.composite_fence_ctx,
136                                             q->parallel.composite_fence_seqno++,
137                                             false);
138                 if (!cf) {
139                         --q->parallel.composite_fence_seqno;
140                         err = -ENOMEM;
141                         goto err_fences;
142                 }
143
144                 /* Sanity check */
145                 for (j = 0; j < q->width; ++j)
146                         xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
147
148                 job->fence = &cf->base;
149         }
150
151         width = q->width;
152         if (is_migration)
153                 width = 2;
154
155         for (i = 0; i < width; ++i)
156                 job->batch_addr[i] = batch_addr[i];
157
158         /* All other jobs require a VM to be open which has a ref */
159         if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
160                 xe_device_mem_access_get(job_to_xe(job));
161         xe_device_assert_mem_access(job_to_xe(job));
162
163         trace_xe_sched_job_create(job);
164         return job;
165
166 err_fences:
167         for (j = j - 1; j >= 0; --j) {
168                 --q->lrc[j].fence_ctx.next_seqno;
169                 dma_fence_put(fences[j]);
170         }
171         kfree(fences);
172 err_sched_job:
173         drm_sched_job_cleanup(&job->drm);
174 err_free:
175         xe_exec_queue_put(q);
176         job_free(job);
177         return ERR_PTR(err);
178 }
179
180 /**
181  * xe_sched_job_destroy - Destroy XE schedule job
182  * @ref: reference to XE schedule job
183  *
184  * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
185  * base DRM schedule job, and free memory for XE schedule job.
186  */
187 void xe_sched_job_destroy(struct kref *ref)
188 {
189         struct xe_sched_job *job =
190                 container_of(ref, struct xe_sched_job, refcount);
191
192         if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
193                 xe_device_mem_access_put(job_to_xe(job));
194         xe_exec_queue_put(job->q);
195         dma_fence_put(job->fence);
196         drm_sched_job_cleanup(&job->drm);
197         job_free(job);
198 }
199
200 void xe_sched_job_set_error(struct xe_sched_job *job, int error)
201 {
202         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
203                 return;
204
205         dma_fence_set_error(job->fence, error);
206
207         if (dma_fence_is_array(job->fence)) {
208                 struct dma_fence_array *array =
209                         to_dma_fence_array(job->fence);
210                 struct dma_fence **child = array->fences;
211                 unsigned int nchild = array->num_fences;
212
213                 do {
214                         struct dma_fence *current_fence = *child++;
215
216                         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
217                                      &current_fence->flags))
218                                 continue;
219                         dma_fence_set_error(current_fence, error);
220                 } while (--nchild);
221         }
222
223         trace_xe_sched_job_set_error(job);
224
225         dma_fence_enable_sw_signaling(job->fence);
226         xe_hw_fence_irq_run(job->q->fence_irq);
227 }
228
229 bool xe_sched_job_started(struct xe_sched_job *job)
230 {
231         struct xe_lrc *lrc = job->q->lrc;
232
233         return !__dma_fence_is_later(xe_sched_job_seqno(job),
234                                      xe_lrc_start_seqno(lrc),
235                                      job->fence->ops);
236 }
237
238 bool xe_sched_job_completed(struct xe_sched_job *job)
239 {
240         struct xe_lrc *lrc = job->q->lrc;
241
242         /*
243          * Can safely check just LRC[0] seqno as that is last seqno written when
244          * parallel handshake is done.
245          */
246
247         return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
248                                      job->fence->ops);
249 }
250
251 void xe_sched_job_arm(struct xe_sched_job *job)
252 {
253         drm_sched_job_arm(&job->drm);
254 }
255
256 void xe_sched_job_push(struct xe_sched_job *job)
257 {
258         xe_sched_job_get(job);
259         trace_xe_sched_job_exec(job);
260         drm_sched_entity_push_job(&job->drm);
261         xe_sched_job_put(job);
262 }
263
264 /**
265  * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
266  * @job:job to add the last fence dependency to
267  * @vm: virtual memory job belongs to
268  *
269  * Returns:
270  * 0 on success, or an error on failing to expand the array.
271  */
272 int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
273 {
274         struct dma_fence *fence;
275
276         fence = xe_exec_queue_last_fence_get(job->q, vm);
277
278         return drm_sched_job_add_dependency(&job->drm, fence);
279 }