4ff5f0fe6db83d49138a7f60bcfdce721e911d0d
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdkfd / kfd_process.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33
34 struct mm_struct;
35
36 #include "kfd_priv.h"
37 #include "kfd_dbgmgr.h"
38
39 /*
40  * List of struct kfd_process (field kfd_process).
41  * Unique/indexed by mm_struct*
42  */
43 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
44 static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
45 static DEFINE_MUTEX(kfd_processes_mutex);
46
47 DEFINE_STATIC_SRCU(kfd_processes_srcu);
48
49 static struct workqueue_struct *kfd_process_wq;
50
51 static struct kfd_process *find_process(const struct task_struct *thread);
52 static void kfd_process_ref_release(struct kref *ref);
53 static struct kfd_process *create_process(const struct task_struct *thread,
54                                         struct file *filep);
55 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
56
57
58 void kfd_process_create_wq(void)
59 {
60         if (!kfd_process_wq)
61                 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
62 }
63
64 void kfd_process_destroy_wq(void)
65 {
66         if (kfd_process_wq) {
67                 destroy_workqueue(kfd_process_wq);
68                 kfd_process_wq = NULL;
69         }
70 }
71
72 struct kfd_process *kfd_create_process(struct file *filep)
73 {
74         struct kfd_process *process;
75         struct task_struct *thread = current;
76
77         if (!thread->mm)
78                 return ERR_PTR(-EINVAL);
79
80         /* Only the pthreads threading model is supported. */
81         if (thread->group_leader->mm != thread->mm)
82                 return ERR_PTR(-EINVAL);
83
84         /*
85          * take kfd processes mutex before starting of process creation
86          * so there won't be a case where two threads of the same process
87          * create two kfd_process structures
88          */
89         mutex_lock(&kfd_processes_mutex);
90
91         /* A prior open of /dev/kfd could have already created the process. */
92         process = find_process(thread);
93         if (process)
94                 pr_debug("Process already found\n");
95         else
96                 process = create_process(thread, filep);
97
98         mutex_unlock(&kfd_processes_mutex);
99
100         return process;
101 }
102
103 struct kfd_process *kfd_get_process(const struct task_struct *thread)
104 {
105         struct kfd_process *process;
106
107         if (!thread->mm)
108                 return ERR_PTR(-EINVAL);
109
110         /* Only the pthreads threading model is supported. */
111         if (thread->group_leader->mm != thread->mm)
112                 return ERR_PTR(-EINVAL);
113
114         process = find_process(thread);
115
116         return process;
117 }
118
119 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
120 {
121         struct kfd_process *process;
122
123         hash_for_each_possible_rcu(kfd_processes_table, process,
124                                         kfd_processes, (uintptr_t)mm)
125                 if (process->mm == mm)
126                         return process;
127
128         return NULL;
129 }
130
131 static struct kfd_process *find_process(const struct task_struct *thread)
132 {
133         struct kfd_process *p;
134         int idx;
135
136         idx = srcu_read_lock(&kfd_processes_srcu);
137         p = find_process_by_mm(thread->mm);
138         srcu_read_unlock(&kfd_processes_srcu, idx);
139
140         return p;
141 }
142
143 void kfd_unref_process(struct kfd_process *p)
144 {
145         kref_put(&p->ref, kfd_process_ref_release);
146 }
147
148 static void kfd_process_destroy_pdds(struct kfd_process *p)
149 {
150         struct kfd_process_device *pdd, *temp;
151
152         list_for_each_entry_safe(pdd, temp, &p->per_device_data,
153                                  per_device_list) {
154                 pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
155                                 pdd->dev->id, p->pasid);
156
157                 list_del(&pdd->per_device_list);
158
159                 if (pdd->qpd.cwsr_kaddr)
160                         free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
161                                 get_order(KFD_CWSR_TBA_TMA_SIZE));
162
163                 kfree(pdd);
164         }
165 }
166
167 /* No process locking is needed in this function, because the process
168  * is not findable any more. We must assume that no other thread is
169  * using it any more, otherwise we couldn't safely free the process
170  * structure in the end.
171  */
172 static void kfd_process_wq_release(struct work_struct *work)
173 {
174         struct kfd_process *p = container_of(work, struct kfd_process,
175                                              release_work);
176         struct kfd_process_device *pdd;
177
178         pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
179
180         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
181                 if (pdd->bound == PDD_BOUND)
182                         amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
183         }
184
185         kfd_process_destroy_pdds(p);
186
187         kfd_event_free_process(p);
188
189         kfd_pasid_free(p->pasid);
190         kfd_free_process_doorbells(p);
191
192         mutex_destroy(&p->mutex);
193
194         put_task_struct(p->lead_thread);
195
196         kfree(p);
197 }
198
199 static void kfd_process_ref_release(struct kref *ref)
200 {
201         struct kfd_process *p = container_of(ref, struct kfd_process, ref);
202
203         INIT_WORK(&p->release_work, kfd_process_wq_release);
204         queue_work(kfd_process_wq, &p->release_work);
205 }
206
207 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
208 {
209         struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
210
211         kfd_unref_process(p);
212 }
213
214 static void kfd_process_notifier_release(struct mmu_notifier *mn,
215                                         struct mm_struct *mm)
216 {
217         struct kfd_process *p;
218         struct kfd_process_device *pdd = NULL;
219
220         /*
221          * The kfd_process structure can not be free because the
222          * mmu_notifier srcu is read locked
223          */
224         p = container_of(mn, struct kfd_process, mmu_notifier);
225         if (WARN_ON(p->mm != mm))
226                 return;
227
228         mutex_lock(&kfd_processes_mutex);
229         hash_del_rcu(&p->kfd_processes);
230         mutex_unlock(&kfd_processes_mutex);
231         synchronize_srcu(&kfd_processes_srcu);
232
233         mutex_lock(&p->mutex);
234
235         /* Iterate over all process device data structures and if the
236          * pdd is in debug mode, we should first force unregistration,
237          * then we will be able to destroy the queues
238          */
239         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
240                 struct kfd_dev *dev = pdd->dev;
241
242                 mutex_lock(kfd_get_dbgmgr_mutex());
243                 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
244                         if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
245                                 kfd_dbgmgr_destroy(dev->dbgmgr);
246                                 dev->dbgmgr = NULL;
247                         }
248                 }
249                 mutex_unlock(kfd_get_dbgmgr_mutex());
250         }
251
252         kfd_process_dequeue_from_all_devices(p);
253         pqm_uninit(&p->pqm);
254
255         /* Indicate to other users that MM is no longer valid */
256         p->mm = NULL;
257
258         mutex_unlock(&p->mutex);
259
260         mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
261         mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
262 }
263
264 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
265         .release = kfd_process_notifier_release,
266 };
267
268 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
269 {
270         unsigned long  offset;
271         struct kfd_process_device *pdd = NULL;
272         struct kfd_dev *dev = NULL;
273         struct qcm_process_device *qpd = NULL;
274
275         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
276                 dev = pdd->dev;
277                 qpd = &pdd->qpd;
278                 if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
279                         continue;
280                 offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
281                 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
282                         KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
283                         MAP_SHARED, offset);
284
285                 if (IS_ERR_VALUE(qpd->tba_addr)) {
286                         int err = qpd->tba_addr;
287
288                         pr_err("Failure to set tba address. error %d.\n", err);
289                         qpd->tba_addr = 0;
290                         qpd->cwsr_kaddr = NULL;
291                         return err;
292                 }
293
294                 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
295
296                 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
297                 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
298                         qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
299         }
300
301         return 0;
302 }
303
304 static struct kfd_process *create_process(const struct task_struct *thread,
305                                         struct file *filep)
306 {
307         struct kfd_process *process;
308         int err = -ENOMEM;
309
310         process = kzalloc(sizeof(*process), GFP_KERNEL);
311
312         if (!process)
313                 goto err_alloc_process;
314
315         process->pasid = kfd_pasid_alloc();
316         if (process->pasid == 0)
317                 goto err_alloc_pasid;
318
319         if (kfd_alloc_process_doorbells(process) < 0)
320                 goto err_alloc_doorbells;
321
322         kref_init(&process->ref);
323
324         mutex_init(&process->mutex);
325
326         process->mm = thread->mm;
327
328         /* register notifier */
329         process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
330         err = mmu_notifier_register(&process->mmu_notifier, process->mm);
331         if (err)
332                 goto err_mmu_notifier;
333
334         hash_add_rcu(kfd_processes_table, &process->kfd_processes,
335                         (uintptr_t)process->mm);
336
337         process->lead_thread = thread->group_leader;
338         get_task_struct(process->lead_thread);
339
340         INIT_LIST_HEAD(&process->per_device_data);
341
342         kfd_event_init_process(process);
343
344         err = pqm_init(&process->pqm, process);
345         if (err != 0)
346                 goto err_process_pqm_init;
347
348         /* init process apertures*/
349         process->is_32bit_user_mode = in_compat_syscall();
350         err = kfd_init_apertures(process);
351         if (err != 0)
352                 goto err_init_apertures;
353
354         err = kfd_process_init_cwsr(process, filep);
355         if (err)
356                 goto err_init_cwsr;
357
358         return process;
359
360 err_init_cwsr:
361         kfd_process_destroy_pdds(process);
362 err_init_apertures:
363         pqm_uninit(&process->pqm);
364 err_process_pqm_init:
365         hash_del_rcu(&process->kfd_processes);
366         synchronize_rcu();
367         mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
368 err_mmu_notifier:
369         mutex_destroy(&process->mutex);
370         kfd_free_process_doorbells(process);
371 err_alloc_doorbells:
372         kfd_pasid_free(process->pasid);
373 err_alloc_pasid:
374         kfree(process);
375 err_alloc_process:
376         return ERR_PTR(err);
377 }
378
379 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
380                                                         struct kfd_process *p)
381 {
382         struct kfd_process_device *pdd = NULL;
383
384         list_for_each_entry(pdd, &p->per_device_data, per_device_list)
385                 if (pdd->dev == dev)
386                         return pdd;
387
388         return NULL;
389 }
390
391 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
392                                                         struct kfd_process *p)
393 {
394         struct kfd_process_device *pdd = NULL;
395
396         pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
397         if (!pdd)
398                 return NULL;
399
400         pdd->dev = dev;
401         INIT_LIST_HEAD(&pdd->qpd.queues_list);
402         INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
403         pdd->qpd.dqm = dev->dqm;
404         pdd->qpd.pqm = &p->pqm;
405         pdd->process = p;
406         pdd->bound = PDD_UNBOUND;
407         pdd->already_dequeued = false;
408         list_add(&pdd->per_device_list, &p->per_device_data);
409
410         return pdd;
411 }
412
413 /*
414  * Direct the IOMMU to bind the process (specifically the pasid->mm)
415  * to the device.
416  * Unbinding occurs when the process dies or the device is removed.
417  *
418  * Assumes that the process lock is held.
419  */
420 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
421                                                         struct kfd_process *p)
422 {
423         struct kfd_process_device *pdd;
424         int err;
425
426         pdd = kfd_get_process_device_data(dev, p);
427         if (!pdd) {
428                 pr_err("Process device data doesn't exist\n");
429                 return ERR_PTR(-ENOMEM);
430         }
431
432         if (pdd->bound == PDD_BOUND) {
433                 return pdd;
434         } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
435                 pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
436                 return ERR_PTR(-EINVAL);
437         }
438
439         err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
440         if (err < 0)
441                 return ERR_PTR(err);
442
443         pdd->bound = PDD_BOUND;
444
445         return pdd;
446 }
447
448 /*
449  * Bind processes do the device that have been temporarily unbound
450  * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
451  */
452 int kfd_bind_processes_to_device(struct kfd_dev *dev)
453 {
454         struct kfd_process_device *pdd;
455         struct kfd_process *p;
456         unsigned int temp;
457         int err = 0;
458
459         int idx = srcu_read_lock(&kfd_processes_srcu);
460
461         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
462                 mutex_lock(&p->mutex);
463                 pdd = kfd_get_process_device_data(dev, p);
464
465                 if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
466                         mutex_unlock(&p->mutex);
467                         continue;
468                 }
469
470                 err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
471                                 p->lead_thread);
472                 if (err < 0) {
473                         pr_err("Unexpected pasid %d binding failure\n",
474                                         p->pasid);
475                         mutex_unlock(&p->mutex);
476                         break;
477                 }
478
479                 pdd->bound = PDD_BOUND;
480                 mutex_unlock(&p->mutex);
481         }
482
483         srcu_read_unlock(&kfd_processes_srcu, idx);
484
485         return err;
486 }
487
488 /*
489  * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
490  * processes will be restored to PDD_BOUND state in
491  * kfd_bind_processes_to_device.
492  */
493 void kfd_unbind_processes_from_device(struct kfd_dev *dev)
494 {
495         struct kfd_process_device *pdd;
496         struct kfd_process *p;
497         unsigned int temp;
498
499         int idx = srcu_read_lock(&kfd_processes_srcu);
500
501         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
502                 mutex_lock(&p->mutex);
503                 pdd = kfd_get_process_device_data(dev, p);
504
505                 if (WARN_ON(!pdd)) {
506                         mutex_unlock(&p->mutex);
507                         continue;
508                 }
509
510                 if (pdd->bound == PDD_BOUND)
511                         pdd->bound = PDD_BOUND_SUSPENDED;
512                 mutex_unlock(&p->mutex);
513         }
514
515         srcu_read_unlock(&kfd_processes_srcu, idx);
516 }
517
518 void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
519 {
520         struct kfd_process *p;
521         struct kfd_process_device *pdd;
522
523         /*
524          * Look for the process that matches the pasid. If there is no such
525          * process, we either released it in amdkfd's own notifier, or there
526          * is a bug. Unfortunately, there is no way to tell...
527          */
528         p = kfd_lookup_process_by_pasid(pasid);
529         if (!p)
530                 return;
531
532         pr_debug("Unbinding process %d from IOMMU\n", pasid);
533
534         mutex_lock(kfd_get_dbgmgr_mutex());
535
536         if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
537                 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
538                         kfd_dbgmgr_destroy(dev->dbgmgr);
539                         dev->dbgmgr = NULL;
540                 }
541         }
542
543         mutex_unlock(kfd_get_dbgmgr_mutex());
544
545         mutex_lock(&p->mutex);
546
547         pdd = kfd_get_process_device_data(dev, p);
548         if (pdd)
549                 /* For GPU relying on IOMMU, we need to dequeue here
550                  * when PASID is still bound.
551                  */
552                 kfd_process_dequeue_from_device(pdd);
553
554         mutex_unlock(&p->mutex);
555
556         kfd_unref_process(p);
557 }
558
559 struct kfd_process_device *kfd_get_first_process_device_data(
560                                                 struct kfd_process *p)
561 {
562         return list_first_entry(&p->per_device_data,
563                                 struct kfd_process_device,
564                                 per_device_list);
565 }
566
567 struct kfd_process_device *kfd_get_next_process_device_data(
568                                                 struct kfd_process *p,
569                                                 struct kfd_process_device *pdd)
570 {
571         if (list_is_last(&pdd->per_device_list, &p->per_device_data))
572                 return NULL;
573         return list_next_entry(pdd, per_device_list);
574 }
575
576 bool kfd_has_process_device_data(struct kfd_process *p)
577 {
578         return !(list_empty(&p->per_device_data));
579 }
580
581 /* This increments the process->ref counter. */
582 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
583 {
584         struct kfd_process *p, *ret_p = NULL;
585         unsigned int temp;
586
587         int idx = srcu_read_lock(&kfd_processes_srcu);
588
589         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
590                 if (p->pasid == pasid) {
591                         kref_get(&p->ref);
592                         ret_p = p;
593                         break;
594                 }
595         }
596
597         srcu_read_unlock(&kfd_processes_srcu, idx);
598
599         return ret_p;
600 }
601
602 int kfd_reserved_mem_mmap(struct kfd_process *process,
603                           struct vm_area_struct *vma)
604 {
605         struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
606         struct kfd_process_device *pdd;
607         struct qcm_process_device *qpd;
608
609         if (!dev)
610                 return -EINVAL;
611         if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
612                 pr_err("Incorrect CWSR mapping size.\n");
613                 return -EINVAL;
614         }
615
616         pdd = kfd_get_process_device_data(dev, process);
617         if (!pdd)
618                 return -EINVAL;
619         qpd = &pdd->qpd;
620
621         qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
622                                         get_order(KFD_CWSR_TBA_TMA_SIZE));
623         if (!qpd->cwsr_kaddr) {
624                 pr_err("Error allocating per process CWSR buffer.\n");
625                 return -ENOMEM;
626         }
627
628         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
629                 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
630         /* Mapping pages to user process */
631         return remap_pfn_range(vma, vma->vm_start,
632                                PFN_DOWN(__pa(qpd->cwsr_kaddr)),
633                                KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
634 }
635
636 #if defined(CONFIG_DEBUG_FS)
637
638 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
639 {
640         struct kfd_process *p;
641         unsigned int temp;
642         int r = 0;
643
644         int idx = srcu_read_lock(&kfd_processes_srcu);
645
646         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
647                 seq_printf(m, "Process %d PASID %d:\n",
648                            p->lead_thread->tgid, p->pasid);
649
650                 mutex_lock(&p->mutex);
651                 r = pqm_debugfs_mqds(m, &p->pqm);
652                 mutex_unlock(&p->mutex);
653
654                 if (r)
655                         break;
656         }
657
658         srcu_read_unlock(&kfd_processes_srcu, idx);
659
660         return r;
661 }
662
663 #endif