rust: upgrade to Rust 1.76.0
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdkfd / kfd_process_queue_manager.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/slab.h>
26 #include <linux/list.h>
27 #include "kfd_device_queue_manager.h"
28 #include "kfd_priv.h"
29 #include "kfd_kernel_queue.h"
30 #include "amdgpu_amdkfd.h"
31
32 static inline struct process_queue_node *get_queue_by_qid(
33                         struct process_queue_manager *pqm, unsigned int qid)
34 {
35         struct process_queue_node *pqn;
36
37         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
38                 if ((pqn->q && pqn->q->properties.queue_id == qid) ||
39                     (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
40                         return pqn;
41         }
42
43         return NULL;
44 }
45
46 static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
47                                     unsigned int qid)
48 {
49         if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
50                 return -EINVAL;
51
52         if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
53                 pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
54                 return -ENOSPC;
55         }
56
57         return 0;
58 }
59
60 static int find_available_queue_slot(struct process_queue_manager *pqm,
61                                         unsigned int *qid)
62 {
63         unsigned long found;
64
65         found = find_first_zero_bit(pqm->queue_slot_bitmap,
66                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
67
68         pr_debug("The new slot id %lu\n", found);
69
70         if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
71                 pr_info("Cannot open more queues for process with pasid 0x%x\n",
72                                 pqm->process->pasid);
73                 return -ENOMEM;
74         }
75
76         set_bit(found, pqm->queue_slot_bitmap);
77         *qid = found;
78
79         return 0;
80 }
81
82 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
83 {
84         struct kfd_node *dev = pdd->dev;
85
86         if (pdd->already_dequeued)
87                 return;
88
89         dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
90         if (dev->kfd->shared_resources.enable_mes)
91                 amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);
92         pdd->already_dequeued = true;
93 }
94
95 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
96                         void *gws)
97 {
98         struct kfd_node *dev = NULL;
99         struct process_queue_node *pqn;
100         struct kfd_process_device *pdd;
101         struct kgd_mem *mem = NULL;
102         int ret;
103
104         pqn = get_queue_by_qid(pqm, qid);
105         if (!pqn) {
106                 pr_err("Queue id does not match any known queue\n");
107                 return -EINVAL;
108         }
109
110         if (pqn->q)
111                 dev = pqn->q->device;
112         if (WARN_ON(!dev))
113                 return -ENODEV;
114
115         pdd = kfd_get_process_device_data(dev, pqm->process);
116         if (!pdd) {
117                 pr_err("Process device data doesn't exist\n");
118                 return -EINVAL;
119         }
120
121         /* Only allow one queue per process can have GWS assigned */
122         if (gws && pdd->qpd.num_gws)
123                 return -EBUSY;
124
125         if (!gws && pdd->qpd.num_gws == 0)
126                 return -EINVAL;
127
128         if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
129                 if (gws)
130                         ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
131                                 gws, &mem);
132                 else
133                         ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
134                                 pqn->q->gws);
135                 if (unlikely(ret))
136                         return ret;
137                 pqn->q->gws = mem;
138         } else {
139                 /*
140                  * Intentionally set GWS to a non-NULL value
141                  * for devices that do not use GWS for global wave
142                  * synchronization but require the formality
143                  * of setting GWS for cooperative groups.
144                  */
145                 pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
146         }
147
148         pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
149
150         return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
151                                                         pqn->q, NULL);
152 }
153
154 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
155 {
156         int i;
157
158         for (i = 0; i < p->n_pdds; i++)
159                 kfd_process_dequeue_from_device(p->pdds[i]);
160 }
161
162 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
163 {
164         INIT_LIST_HEAD(&pqm->queues);
165         pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
166                                                GFP_KERNEL);
167         if (!pqm->queue_slot_bitmap)
168                 return -ENOMEM;
169         pqm->process = p;
170
171         return 0;
172 }
173
174 static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
175                                      struct process_queue_node *pqn)
176 {
177         struct kfd_node *dev;
178         struct kfd_process_device *pdd;
179
180         dev = pqn->q->device;
181
182         pdd = kfd_get_process_device_data(dev, pqm->process);
183         if (!pdd) {
184                 pr_err("Process device data doesn't exist\n");
185                 return;
186         }
187
188         if (pqn->q->gws) {
189                 if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
190                     !dev->kfd->shared_resources.enable_mes)
191                         amdgpu_amdkfd_remove_gws_from_process(
192                                 pqm->process->kgd_process_info, pqn->q->gws);
193                 pdd->qpd.num_gws = 0;
194         }
195
196         if (dev->kfd->shared_resources.enable_mes) {
197                 amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
198                 if (pqn->q->wptr_bo)
199                         amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
200         }
201 }
202
203 void pqm_uninit(struct process_queue_manager *pqm)
204 {
205         struct process_queue_node *pqn, *next;
206
207         list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
208                 if (pqn->q)
209                         pqm_clean_queue_resource(pqm, pqn);
210
211                 kfd_procfs_del_queue(pqn->q);
212                 uninit_queue(pqn->q);
213                 list_del(&pqn->process_queue_list);
214                 kfree(pqn);
215         }
216
217         bitmap_free(pqm->queue_slot_bitmap);
218         pqm->queue_slot_bitmap = NULL;
219 }
220
221 static int init_user_queue(struct process_queue_manager *pqm,
222                                 struct kfd_node *dev, struct queue **q,
223                                 struct queue_properties *q_properties,
224                                 struct file *f, struct amdgpu_bo *wptr_bo,
225                                 unsigned int qid)
226 {
227         int retval;
228
229         /* Doorbell initialized in user space*/
230         q_properties->doorbell_ptr = NULL;
231         q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
232
233         /* let DQM handle it*/
234         q_properties->vmid = 0;
235         q_properties->queue_id = qid;
236
237         retval = init_queue(q, q_properties);
238         if (retval != 0)
239                 return retval;
240
241         (*q)->device = dev;
242         (*q)->process = pqm->process;
243
244         if (dev->kfd->shared_resources.enable_mes) {
245                 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
246                                                 AMDGPU_MES_GANG_CTX_SIZE,
247                                                 &(*q)->gang_ctx_bo,
248                                                 &(*q)->gang_ctx_gpu_addr,
249                                                 &(*q)->gang_ctx_cpu_ptr,
250                                                 false);
251                 if (retval) {
252                         pr_err("failed to allocate gang context bo\n");
253                         goto cleanup;
254                 }
255                 memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
256                 (*q)->wptr_bo = wptr_bo;
257         }
258
259         pr_debug("PQM After init queue");
260         return 0;
261
262 cleanup:
263         uninit_queue(*q);
264         *q = NULL;
265         return retval;
266 }
267
268 int pqm_create_queue(struct process_queue_manager *pqm,
269                             struct kfd_node *dev,
270                             struct file *f,
271                             struct queue_properties *properties,
272                             unsigned int *qid,
273                             struct amdgpu_bo *wptr_bo,
274                             const struct kfd_criu_queue_priv_data *q_data,
275                             const void *restore_mqd,
276                             const void *restore_ctl_stack,
277                             uint32_t *p_doorbell_offset_in_process)
278 {
279         int retval;
280         struct kfd_process_device *pdd;
281         struct queue *q;
282         struct process_queue_node *pqn;
283         struct kernel_queue *kq;
284         enum kfd_queue_type type = properties->type;
285         unsigned int max_queues = 127; /* HWS limit */
286
287         /*
288          * On GFX 9.4.3, increase the number of queues that
289          * can be created to 255. No HWS limit on GFX 9.4.3.
290          */
291         if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
292                 max_queues = 255;
293
294         q = NULL;
295         kq = NULL;
296
297         pdd = kfd_get_process_device_data(dev, pqm->process);
298         if (!pdd) {
299                 pr_err("Process device data doesn't exist\n");
300                 return -1;
301         }
302
303         /*
304          * for debug process, verify that it is within the static queues limit
305          * currently limit is set to half of the total avail HQD slots
306          * If we are just about to create DIQ, the is_debug flag is not set yet
307          * Hence we also check the type as well
308          */
309         if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
310                 max_queues = dev->kfd->device_info.max_no_of_hqd/2;
311
312         if (pdd->qpd.queue_count >= max_queues)
313                 return -ENOSPC;
314
315         if (q_data) {
316                 retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
317                 *qid = q_data->q_id;
318         } else
319                 retval = find_available_queue_slot(pqm, qid);
320
321         if (retval != 0)
322                 return retval;
323
324         if (list_empty(&pdd->qpd.queues_list) &&
325             list_empty(&pdd->qpd.priv_queue_list))
326                 dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
327
328         pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
329         if (!pqn) {
330                 retval = -ENOMEM;
331                 goto err_allocate_pqn;
332         }
333
334         switch (type) {
335         case KFD_QUEUE_TYPE_SDMA:
336         case KFD_QUEUE_TYPE_SDMA_XGMI:
337                 /* SDMA queues are always allocated statically no matter
338                  * which scheduler mode is used. We also do not need to
339                  * check whether a SDMA queue can be allocated here, because
340                  * allocate_sdma_queue() in create_queue() has the
341                  * corresponding check logic.
342                  */
343                 retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
344                 if (retval != 0)
345                         goto err_create_queue;
346                 pqn->q = q;
347                 pqn->kq = NULL;
348                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
349                                                     restore_mqd, restore_ctl_stack);
350                 print_queue(q);
351                 break;
352
353         case KFD_QUEUE_TYPE_COMPUTE:
354                 /* check if there is over subscription */
355                 if ((dev->dqm->sched_policy ==
356                      KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
357                 ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
358                 (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
359                         pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
360                         retval = -EPERM;
361                         goto err_create_queue;
362                 }
363
364                 retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
365                 if (retval != 0)
366                         goto err_create_queue;
367                 pqn->q = q;
368                 pqn->kq = NULL;
369                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
370                                                     restore_mqd, restore_ctl_stack);
371                 print_queue(q);
372                 break;
373         case KFD_QUEUE_TYPE_DIQ:
374                 kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
375                 if (!kq) {
376                         retval = -ENOMEM;
377                         goto err_create_queue;
378                 }
379                 kq->queue->properties.queue_id = *qid;
380                 pqn->kq = kq;
381                 pqn->q = NULL;
382                 retval = kfd_process_drain_interrupts(pdd);
383                 if (retval)
384                         break;
385
386                 retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
387                                                         kq, &pdd->qpd);
388                 break;
389         default:
390                 WARN(1, "Invalid queue type %d", type);
391                 retval = -EINVAL;
392         }
393
394         if (retval != 0) {
395                 pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
396                         pqm->process->pasid, type, retval);
397                 goto err_create_queue;
398         }
399
400         if (q && p_doorbell_offset_in_process) {
401                 /* Return the doorbell offset within the doorbell page
402                  * to the caller so it can be passed up to user mode
403                  * (in bytes).
404                  * relative doorbell index = Absolute doorbell index -
405                  * absolute index of first doorbell in the page.
406                  */
407                 uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
408                                                                        pdd->qpd.proc_doorbells,
409                                                                        0,
410                                                                        pdd->dev->kfd->device_info.doorbell_size);
411
412                 *p_doorbell_offset_in_process = (q->properties.doorbell_off
413                                                 - first_db_index) * sizeof(uint32_t);
414         }
415
416         pr_debug("PQM After DQM create queue\n");
417
418         list_add(&pqn->process_queue_list, &pqm->queues);
419
420         if (q) {
421                 pr_debug("PQM done creating queue\n");
422                 kfd_procfs_add_queue(q);
423                 print_queue_properties(&q->properties);
424         }
425
426         return retval;
427
428 err_create_queue:
429         uninit_queue(q);
430         if (kq)
431                 kernel_queue_uninit(kq, false);
432         kfree(pqn);
433 err_allocate_pqn:
434         /* check if queues list is empty unregister process from device */
435         clear_bit(*qid, pqm->queue_slot_bitmap);
436         if (list_empty(&pdd->qpd.queues_list) &&
437             list_empty(&pdd->qpd.priv_queue_list))
438                 dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
439         return retval;
440 }
441
442 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
443 {
444         struct process_queue_node *pqn;
445         struct kfd_process_device *pdd;
446         struct device_queue_manager *dqm;
447         struct kfd_node *dev;
448         int retval;
449
450         dqm = NULL;
451
452         retval = 0;
453
454         pqn = get_queue_by_qid(pqm, qid);
455         if (!pqn) {
456                 pr_err("Queue id does not match any known queue\n");
457                 return -EINVAL;
458         }
459
460         dev = NULL;
461         if (pqn->kq)
462                 dev = pqn->kq->dev;
463         if (pqn->q)
464                 dev = pqn->q->device;
465         if (WARN_ON(!dev))
466                 return -ENODEV;
467
468         pdd = kfd_get_process_device_data(dev, pqm->process);
469         if (!pdd) {
470                 pr_err("Process device data doesn't exist\n");
471                 return -1;
472         }
473
474         if (pqn->kq) {
475                 /* destroy kernel queue (DIQ) */
476                 dqm = pqn->kq->dev->dqm;
477                 dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
478                 kernel_queue_uninit(pqn->kq, false);
479         }
480
481         if (pqn->q) {
482                 kfd_procfs_del_queue(pqn->q);
483                 dqm = pqn->q->device->dqm;
484                 retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
485                 if (retval) {
486                         pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
487                                 pqm->process->pasid,
488                                 pqn->q->properties.queue_id, retval);
489                         if (retval != -ETIME)
490                                 goto err_destroy_queue;
491                 }
492
493                 pqm_clean_queue_resource(pqm, pqn);
494                 uninit_queue(pqn->q);
495         }
496
497         list_del(&pqn->process_queue_list);
498         kfree(pqn);
499         clear_bit(qid, pqm->queue_slot_bitmap);
500
501         if (list_empty(&pdd->qpd.queues_list) &&
502             list_empty(&pdd->qpd.priv_queue_list))
503                 dqm->ops.unregister_process(dqm, &pdd->qpd);
504
505 err_destroy_queue:
506         return retval;
507 }
508
509 int pqm_update_queue_properties(struct process_queue_manager *pqm,
510                                 unsigned int qid, struct queue_properties *p)
511 {
512         int retval;
513         struct process_queue_node *pqn;
514
515         pqn = get_queue_by_qid(pqm, qid);
516         if (!pqn) {
517                 pr_debug("No queue %d exists for update operation\n", qid);
518                 return -EFAULT;
519         }
520
521         pqn->q->properties.queue_address = p->queue_address;
522         pqn->q->properties.queue_size = p->queue_size;
523         pqn->q->properties.queue_percent = p->queue_percent;
524         pqn->q->properties.priority = p->priority;
525         pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
526
527         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
528                                                         pqn->q, NULL);
529         if (retval != 0)
530                 return retval;
531
532         return 0;
533 }
534
535 int pqm_update_mqd(struct process_queue_manager *pqm,
536                                 unsigned int qid, struct mqd_update_info *minfo)
537 {
538         int retval;
539         struct process_queue_node *pqn;
540
541         pqn = get_queue_by_qid(pqm, qid);
542         if (!pqn) {
543                 pr_debug("No queue %d exists for update operation\n", qid);
544                 return -EFAULT;
545         }
546
547         /* CUs are masked for debugger requirements so deny user mask  */
548         if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
549                 return -EBUSY;
550
551         /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
552         if (minfo && minfo->cu_mask.ptr &&
553                         KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
554                 int i;
555
556                 for (i = 0; i < minfo->cu_mask.count; i += 2) {
557                         uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
558
559                         if (cu_pair && cu_pair != 0x3) {
560                                 pr_debug("CUs must be adjacent pairwise enabled.\n");
561                                 return -EINVAL;
562                         }
563                 }
564         }
565
566         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
567                                                         pqn->q, minfo);
568         if (retval != 0)
569                 return retval;
570
571         if (minfo && minfo->cu_mask.ptr)
572                 pqn->q->properties.is_user_cu_masked = true;
573
574         return 0;
575 }
576
577 struct kernel_queue *pqm_get_kernel_queue(
578                                         struct process_queue_manager *pqm,
579                                         unsigned int qid)
580 {
581         struct process_queue_node *pqn;
582
583         pqn = get_queue_by_qid(pqm, qid);
584         if (pqn && pqn->kq)
585                 return pqn->kq;
586
587         return NULL;
588 }
589
590 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
591                                         unsigned int qid)
592 {
593         struct process_queue_node *pqn;
594
595         pqn = get_queue_by_qid(pqm, qid);
596         return pqn ? pqn->q : NULL;
597 }
598
599 int pqm_get_wave_state(struct process_queue_manager *pqm,
600                        unsigned int qid,
601                        void __user *ctl_stack,
602                        u32 *ctl_stack_used_size,
603                        u32 *save_area_used_size)
604 {
605         struct process_queue_node *pqn;
606
607         pqn = get_queue_by_qid(pqm, qid);
608         if (!pqn) {
609                 pr_debug("amdkfd: No queue %d exists for operation\n",
610                          qid);
611                 return -EFAULT;
612         }
613
614         return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
615                                                        pqn->q,
616                                                        ctl_stack,
617                                                        ctl_stack_used_size,
618                                                        save_area_used_size);
619 }
620
621 int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
622                            uint64_t exception_clear_mask,
623                            void __user *buf,
624                            int *num_qss_entries,
625                            uint32_t *entry_size)
626 {
627         struct process_queue_node *pqn;
628         struct kfd_queue_snapshot_entry src;
629         uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
630         int r = 0;
631
632         *num_qss_entries = 0;
633         if (!(*entry_size))
634                 return -EINVAL;
635
636         *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
637         mutex_lock(&pqm->process->event_mutex);
638
639         memset(&src, 0, sizeof(src));
640
641         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
642                 if (!pqn->q)
643                         continue;
644
645                 if (*num_qss_entries < tmp_qss_entries) {
646                         set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
647
648                         if (copy_to_user(buf, &src, *entry_size)) {
649                                 r = -EFAULT;
650                                 break;
651                         }
652                         buf += tmp_entry_size;
653                 }
654                 *num_qss_entries += 1;
655         }
656
657         mutex_unlock(&pqm->process->event_mutex);
658         return r;
659 }
660
661 static int get_queue_data_sizes(struct kfd_process_device *pdd,
662                                 struct queue *q,
663                                 uint32_t *mqd_size,
664                                 uint32_t *ctl_stack_size)
665 {
666         int ret;
667
668         ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
669                                             q->properties.queue_id,
670                                             mqd_size,
671                                             ctl_stack_size);
672         if (ret)
673                 pr_err("Failed to get queue dump info (%d)\n", ret);
674
675         return ret;
676 }
677
678 int kfd_process_get_queue_info(struct kfd_process *p,
679                                uint32_t *num_queues,
680                                uint64_t *priv_data_sizes)
681 {
682         uint32_t extra_data_sizes = 0;
683         struct queue *q;
684         int i;
685         int ret;
686
687         *num_queues = 0;
688
689         /* Run over all PDDs of the process */
690         for (i = 0; i < p->n_pdds; i++) {
691                 struct kfd_process_device *pdd = p->pdds[i];
692
693                 list_for_each_entry(q, &pdd->qpd.queues_list, list) {
694                         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
695                                 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
696                                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
697                                 uint32_t mqd_size, ctl_stack_size;
698
699                                 *num_queues = *num_queues + 1;
700
701                                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
702                                 if (ret)
703                                         return ret;
704
705                                 extra_data_sizes += mqd_size + ctl_stack_size;
706                         } else {
707                                 pr_err("Unsupported queue type (%d)\n", q->properties.type);
708                                 return -EOPNOTSUPP;
709                         }
710                 }
711         }
712         *priv_data_sizes = extra_data_sizes +
713                                 (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
714
715         return 0;
716 }
717
718 static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
719                               unsigned int qid,
720                               void *mqd,
721                               void *ctl_stack)
722 {
723         struct process_queue_node *pqn;
724
725         pqn = get_queue_by_qid(pqm, qid);
726         if (!pqn) {
727                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
728                 return -EFAULT;
729         }
730
731         if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
732                 pr_err("amdkfd: queue dumping not supported on this device\n");
733                 return -EOPNOTSUPP;
734         }
735
736         return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
737                                                        pqn->q, mqd, ctl_stack);
738 }
739
740 static int criu_checkpoint_queue(struct kfd_process_device *pdd,
741                            struct queue *q,
742                            struct kfd_criu_queue_priv_data *q_data)
743 {
744         uint8_t *mqd, *ctl_stack;
745         int ret;
746
747         mqd = (void *)(q_data + 1);
748         ctl_stack = mqd + q_data->mqd_size;
749
750         q_data->gpu_id = pdd->user_gpu_id;
751         q_data->type = q->properties.type;
752         q_data->format = q->properties.format;
753         q_data->q_id =  q->properties.queue_id;
754         q_data->q_address = q->properties.queue_address;
755         q_data->q_size = q->properties.queue_size;
756         q_data->priority = q->properties.priority;
757         q_data->q_percent = q->properties.queue_percent;
758         q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
759         q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
760         q_data->doorbell_id = q->doorbell_id;
761
762         q_data->sdma_id = q->sdma_id;
763
764         q_data->eop_ring_buffer_address =
765                 q->properties.eop_ring_buffer_address;
766
767         q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
768
769         q_data->ctx_save_restore_area_address =
770                 q->properties.ctx_save_restore_area_address;
771
772         q_data->ctx_save_restore_area_size =
773                 q->properties.ctx_save_restore_area_size;
774
775         q_data->gws = !!q->gws;
776
777         ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
778         if (ret) {
779                 pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
780                 return ret;
781         }
782
783         pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
784         return ret;
785 }
786
787 static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
788                                    uint8_t __user *user_priv,
789                                    unsigned int *q_index,
790                                    uint64_t *queues_priv_data_offset)
791 {
792         unsigned int q_private_data_size = 0;
793         uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
794         struct queue *q;
795         int ret = 0;
796
797         list_for_each_entry(q, &pdd->qpd.queues_list, list) {
798                 struct kfd_criu_queue_priv_data *q_data;
799                 uint64_t q_data_size;
800                 uint32_t mqd_size;
801                 uint32_t ctl_stack_size;
802
803                 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
804                         q->properties.type != KFD_QUEUE_TYPE_SDMA &&
805                         q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
806
807                         pr_err("Unsupported queue type (%d)\n", q->properties.type);
808                         ret = -EOPNOTSUPP;
809                         break;
810                 }
811
812                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
813                 if (ret)
814                         break;
815
816                 q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
817
818                 /* Increase local buffer space if needed */
819                 if (q_private_data_size < q_data_size) {
820                         kfree(q_private_data);
821
822                         q_private_data = kzalloc(q_data_size, GFP_KERNEL);
823                         if (!q_private_data) {
824                                 ret = -ENOMEM;
825                                 break;
826                         }
827                         q_private_data_size = q_data_size;
828                 }
829
830                 q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
831
832                 /* data stored in this order: priv_data, mqd, ctl_stack */
833                 q_data->mqd_size = mqd_size;
834                 q_data->ctl_stack_size = ctl_stack_size;
835
836                 ret = criu_checkpoint_queue(pdd, q, q_data);
837                 if (ret)
838                         break;
839
840                 q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
841
842                 ret = copy_to_user(user_priv + *queues_priv_data_offset,
843                                 q_data, q_data_size);
844                 if (ret) {
845                         ret = -EFAULT;
846                         break;
847                 }
848                 *queues_priv_data_offset += q_data_size;
849                 *q_index = *q_index + 1;
850         }
851
852         kfree(q_private_data);
853
854         return ret;
855 }
856
857 int kfd_criu_checkpoint_queues(struct kfd_process *p,
858                          uint8_t __user *user_priv_data,
859                          uint64_t *priv_data_offset)
860 {
861         int ret = 0, pdd_index, q_index = 0;
862
863         for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
864                 struct kfd_process_device *pdd = p->pdds[pdd_index];
865
866                 /*
867                  * criu_checkpoint_queues_device will copy data to user and update q_index and
868                  * queues_priv_data_offset
869                  */
870                 ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
871                                               priv_data_offset);
872
873                 if (ret)
874                         break;
875         }
876
877         return ret;
878 }
879
880 static void set_queue_properties_from_criu(struct queue_properties *qp,
881                                           struct kfd_criu_queue_priv_data *q_data)
882 {
883         qp->is_interop = false;
884         qp->queue_percent = q_data->q_percent;
885         qp->priority = q_data->priority;
886         qp->queue_address = q_data->q_address;
887         qp->queue_size = q_data->q_size;
888         qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
889         qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
890         qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
891         qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
892         qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
893         qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
894         qp->ctl_stack_size = q_data->ctl_stack_size;
895         qp->type = q_data->type;
896         qp->format = q_data->format;
897 }
898
899 int kfd_criu_restore_queue(struct kfd_process *p,
900                            uint8_t __user *user_priv_ptr,
901                            uint64_t *priv_data_offset,
902                            uint64_t max_priv_data_size)
903 {
904         uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
905         struct kfd_criu_queue_priv_data *q_data;
906         struct kfd_process_device *pdd;
907         uint64_t q_extra_data_size;
908         struct queue_properties qp;
909         unsigned int queue_id;
910         int ret = 0;
911
912         if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
913                 return -EINVAL;
914
915         q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
916         if (!q_data)
917                 return -ENOMEM;
918
919         ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
920         if (ret) {
921                 ret = -EFAULT;
922                 goto exit;
923         }
924
925         *priv_data_offset += sizeof(*q_data);
926         q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
927
928         if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
929                 ret = -EINVAL;
930                 goto exit;
931         }
932
933         q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
934         if (!q_extra_data) {
935                 ret = -ENOMEM;
936                 goto exit;
937         }
938
939         ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
940         if (ret) {
941                 ret = -EFAULT;
942                 goto exit;
943         }
944
945         *priv_data_offset += q_extra_data_size;
946
947         pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
948         if (!pdd) {
949                 pr_err("Failed to get pdd\n");
950                 ret = -EINVAL;
951                 goto exit;
952         }
953
954         /* data stored in this order: mqd, ctl_stack */
955         mqd = q_extra_data;
956         ctl_stack = mqd + q_data->mqd_size;
957
958         memset(&qp, 0, sizeof(qp));
959         set_queue_properties_from_criu(&qp, q_data);
960
961         print_queue_properties(&qp);
962
963         ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
964                                 NULL);
965         if (ret) {
966                 pr_err("Failed to create new queue err:%d\n", ret);
967                 goto exit;
968         }
969
970         if (q_data->gws)
971                 ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
972
973 exit:
974         if (ret)
975                 pr_err("Failed to restore queue (%d)\n", ret);
976         else
977                 pr_debug("Queue id %d was restored successfully\n", queue_id);
978
979         kfree(q_data);
980
981         return ret;
982 }
983
984 int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
985                                   unsigned int qid,
986                                   uint32_t *mqd_size,
987                                   uint32_t *ctl_stack_size)
988 {
989         struct process_queue_node *pqn;
990
991         pqn = get_queue_by_qid(pqm, qid);
992         if (!pqn) {
993                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
994                 return -EFAULT;
995         }
996
997         if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
998                 pr_err("amdkfd: queue dumping not supported on this device\n");
999                 return -EOPNOTSUPP;
1000         }
1001
1002         pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
1003                                                        pqn->q, mqd_size,
1004                                                        ctl_stack_size);
1005         return 0;
1006 }
1007
1008 #if defined(CONFIG_DEBUG_FS)
1009
1010 int pqm_debugfs_mqds(struct seq_file *m, void *data)
1011 {
1012         struct process_queue_manager *pqm = data;
1013         struct process_queue_node *pqn;
1014         struct queue *q;
1015         enum KFD_MQD_TYPE mqd_type;
1016         struct mqd_manager *mqd_mgr;
1017         int r = 0, xcc, num_xccs = 1;
1018         void *mqd;
1019         uint64_t size = 0;
1020
1021         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
1022                 if (pqn->q) {
1023                         q = pqn->q;
1024                         switch (q->properties.type) {
1025                         case KFD_QUEUE_TYPE_SDMA:
1026                         case KFD_QUEUE_TYPE_SDMA_XGMI:
1027                                 seq_printf(m, "  SDMA queue on device %x\n",
1028                                            q->device->id);
1029                                 mqd_type = KFD_MQD_TYPE_SDMA;
1030                                 break;
1031                         case KFD_QUEUE_TYPE_COMPUTE:
1032                                 seq_printf(m, "  Compute queue on device %x\n",
1033                                            q->device->id);
1034                                 mqd_type = KFD_MQD_TYPE_CP;
1035                                 num_xccs = NUM_XCC(q->device->xcc_mask);
1036                                 break;
1037                         default:
1038                                 seq_printf(m,
1039                                 "  Bad user queue type %d on device %x\n",
1040                                            q->properties.type, q->device->id);
1041                                 continue;
1042                         }
1043                         mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
1044                         size = mqd_mgr->mqd_stride(mqd_mgr,
1045                                                         &q->properties);
1046                 } else if (pqn->kq) {
1047                         q = pqn->kq->queue;
1048                         mqd_mgr = pqn->kq->mqd_mgr;
1049                         switch (q->properties.type) {
1050                         case KFD_QUEUE_TYPE_DIQ:
1051                                 seq_printf(m, "  DIQ on device %x\n",
1052                                            pqn->kq->dev->id);
1053                                 break;
1054                         default:
1055                                 seq_printf(m,
1056                                 "  Bad kernel queue type %d on device %x\n",
1057                                            q->properties.type,
1058                                            pqn->kq->dev->id);
1059                                 continue;
1060                         }
1061                 } else {
1062                         seq_printf(m,
1063                 "  Weird: Queue node with neither kernel nor user queue\n");
1064                         continue;
1065                 }
1066
1067                 for (xcc = 0; xcc < num_xccs; xcc++) {
1068                         mqd = q->mqd + size * xcc;
1069                         r = mqd_mgr->debugfs_show_mqd(m, mqd);
1070                         if (r != 0)
1071                                 break;
1072                 }
1073         }
1074
1075         return r;
1076 }
1077
1078 #endif