Merge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdkfd / kfd_topology.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/errno.h>
27 #include <linux/acpi.h>
28 #include <linux/hash.h>
29 #include <linux/cpufreq.h>
30 #include <linux/log2.h>
31 #include <linux/dmi.h>
32 #include <linux/atomic.h>
33
34 #include "kfd_priv.h"
35 #include "kfd_crat.h"
36 #include "kfd_topology.h"
37 #include "kfd_device_queue_manager.h"
38 #include "kfd_iommu.h"
39 #include "amdgpu_amdkfd.h"
40
41 /* topology_device_list - Master list of all topology devices */
42 static struct list_head topology_device_list;
43 static struct kfd_system_properties sys_props;
44
45 static DECLARE_RWSEM(topology_lock);
46 static atomic_t topology_crat_proximity_domain;
47
48 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
49                                                 uint32_t proximity_domain)
50 {
51         struct kfd_topology_device *top_dev;
52         struct kfd_topology_device *device = NULL;
53
54         down_read(&topology_lock);
55
56         list_for_each_entry(top_dev, &topology_device_list, list)
57                 if (top_dev->proximity_domain == proximity_domain) {
58                         device = top_dev;
59                         break;
60                 }
61
62         up_read(&topology_lock);
63
64         return device;
65 }
66
67 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
68 {
69         struct kfd_topology_device *top_dev = NULL;
70         struct kfd_topology_device *ret = NULL;
71
72         down_read(&topology_lock);
73
74         list_for_each_entry(top_dev, &topology_device_list, list)
75                 if (top_dev->gpu_id == gpu_id) {
76                         ret = top_dev;
77                         break;
78                 }
79
80         up_read(&topology_lock);
81
82         return ret;
83 }
84
85 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
86 {
87         struct kfd_topology_device *top_dev;
88
89         top_dev = kfd_topology_device_by_id(gpu_id);
90         if (!top_dev)
91                 return NULL;
92
93         return top_dev->gpu;
94 }
95
96 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
97 {
98         struct kfd_topology_device *top_dev;
99         struct kfd_dev *device = NULL;
100
101         down_read(&topology_lock);
102
103         list_for_each_entry(top_dev, &topology_device_list, list)
104                 if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
105                         device = top_dev->gpu;
106                         break;
107                 }
108
109         up_read(&topology_lock);
110
111         return device;
112 }
113
114 struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
115 {
116         struct kfd_topology_device *top_dev;
117         struct kfd_dev *device = NULL;
118
119         down_read(&topology_lock);
120
121         list_for_each_entry(top_dev, &topology_device_list, list)
122                 if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
123                         device = top_dev->gpu;
124                         break;
125                 }
126
127         up_read(&topology_lock);
128
129         return device;
130 }
131
132 /* Called with write topology_lock acquired */
133 static void kfd_release_topology_device(struct kfd_topology_device *dev)
134 {
135         struct kfd_mem_properties *mem;
136         struct kfd_cache_properties *cache;
137         struct kfd_iolink_properties *iolink;
138         struct kfd_perf_properties *perf;
139
140         list_del(&dev->list);
141
142         while (dev->mem_props.next != &dev->mem_props) {
143                 mem = container_of(dev->mem_props.next,
144                                 struct kfd_mem_properties, list);
145                 list_del(&mem->list);
146                 kfree(mem);
147         }
148
149         while (dev->cache_props.next != &dev->cache_props) {
150                 cache = container_of(dev->cache_props.next,
151                                 struct kfd_cache_properties, list);
152                 list_del(&cache->list);
153                 kfree(cache);
154         }
155
156         while (dev->io_link_props.next != &dev->io_link_props) {
157                 iolink = container_of(dev->io_link_props.next,
158                                 struct kfd_iolink_properties, list);
159                 list_del(&iolink->list);
160                 kfree(iolink);
161         }
162
163         while (dev->perf_props.next != &dev->perf_props) {
164                 perf = container_of(dev->perf_props.next,
165                                 struct kfd_perf_properties, list);
166                 list_del(&perf->list);
167                 kfree(perf);
168         }
169
170         kfree(dev);
171 }
172
173 void kfd_release_topology_device_list(struct list_head *device_list)
174 {
175         struct kfd_topology_device *dev;
176
177         while (!list_empty(device_list)) {
178                 dev = list_first_entry(device_list,
179                                        struct kfd_topology_device, list);
180                 kfd_release_topology_device(dev);
181         }
182 }
183
184 static void kfd_release_live_view(void)
185 {
186         kfd_release_topology_device_list(&topology_device_list);
187         memset(&sys_props, 0, sizeof(sys_props));
188 }
189
190 struct kfd_topology_device *kfd_create_topology_device(
191                                 struct list_head *device_list)
192 {
193         struct kfd_topology_device *dev;
194
195         dev = kfd_alloc_struct(dev);
196         if (!dev) {
197                 pr_err("No memory to allocate a topology device");
198                 return NULL;
199         }
200
201         INIT_LIST_HEAD(&dev->mem_props);
202         INIT_LIST_HEAD(&dev->cache_props);
203         INIT_LIST_HEAD(&dev->io_link_props);
204         INIT_LIST_HEAD(&dev->perf_props);
205
206         list_add_tail(&dev->list, device_list);
207
208         return dev;
209 }
210
211
212 #define sysfs_show_gen_prop(buffer, fmt, ...) \
213                 snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
214 #define sysfs_show_32bit_prop(buffer, name, value) \
215                 sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
216 #define sysfs_show_64bit_prop(buffer, name, value) \
217                 sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
218 #define sysfs_show_32bit_val(buffer, value) \
219                 sysfs_show_gen_prop(buffer, "%u\n", value)
220 #define sysfs_show_str_val(buffer, value) \
221                 sysfs_show_gen_prop(buffer, "%s\n", value)
222
223 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
224                 char *buffer)
225 {
226         ssize_t ret;
227
228         /* Making sure that the buffer is an empty string */
229         buffer[0] = 0;
230
231         if (attr == &sys_props.attr_genid) {
232                 ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
233         } else if (attr == &sys_props.attr_props) {
234                 sysfs_show_64bit_prop(buffer, "platform_oem",
235                                 sys_props.platform_oem);
236                 sysfs_show_64bit_prop(buffer, "platform_id",
237                                 sys_props.platform_id);
238                 ret = sysfs_show_64bit_prop(buffer, "platform_rev",
239                                 sys_props.platform_rev);
240         } else {
241                 ret = -EINVAL;
242         }
243
244         return ret;
245 }
246
247 static void kfd_topology_kobj_release(struct kobject *kobj)
248 {
249         kfree(kobj);
250 }
251
252 static const struct sysfs_ops sysprops_ops = {
253         .show = sysprops_show,
254 };
255
256 static struct kobj_type sysprops_type = {
257         .release = kfd_topology_kobj_release,
258         .sysfs_ops = &sysprops_ops,
259 };
260
261 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
262                 char *buffer)
263 {
264         ssize_t ret;
265         struct kfd_iolink_properties *iolink;
266
267         /* Making sure that the buffer is an empty string */
268         buffer[0] = 0;
269
270         iolink = container_of(attr, struct kfd_iolink_properties, attr);
271         sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
272         sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
273         sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
274         sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
275         sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
276         sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
277         sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
278         sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
279         sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
280         sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
281         sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
282                         iolink->rec_transfer_size);
283         ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
284
285         return ret;
286 }
287
288 static const struct sysfs_ops iolink_ops = {
289         .show = iolink_show,
290 };
291
292 static struct kobj_type iolink_type = {
293         .release = kfd_topology_kobj_release,
294         .sysfs_ops = &iolink_ops,
295 };
296
297 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
298                 char *buffer)
299 {
300         ssize_t ret;
301         struct kfd_mem_properties *mem;
302
303         /* Making sure that the buffer is an empty string */
304         buffer[0] = 0;
305
306         mem = container_of(attr, struct kfd_mem_properties, attr);
307         sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
308         sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
309         sysfs_show_32bit_prop(buffer, "flags", mem->flags);
310         sysfs_show_32bit_prop(buffer, "width", mem->width);
311         ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
312
313         return ret;
314 }
315
316 static const struct sysfs_ops mem_ops = {
317         .show = mem_show,
318 };
319
320 static struct kobj_type mem_type = {
321         .release = kfd_topology_kobj_release,
322         .sysfs_ops = &mem_ops,
323 };
324
325 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
326                 char *buffer)
327 {
328         ssize_t ret;
329         uint32_t i, j;
330         struct kfd_cache_properties *cache;
331
332         /* Making sure that the buffer is an empty string */
333         buffer[0] = 0;
334
335         cache = container_of(attr, struct kfd_cache_properties, attr);
336         sysfs_show_32bit_prop(buffer, "processor_id_low",
337                         cache->processor_id_low);
338         sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
339         sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
340         sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
341         sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
342                         cache->cachelines_per_tag);
343         sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
344         sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
345         sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
346         snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
347         for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
348                 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
349                         /* Check each bit */
350                         if (cache->sibling_map[i] & (1 << j))
351                                 ret = snprintf(buffer, PAGE_SIZE,
352                                          "%s%d%s", buffer, 1, ",");
353                         else
354                                 ret = snprintf(buffer, PAGE_SIZE,
355                                          "%s%d%s", buffer, 0, ",");
356                 }
357         /* Replace the last "," with end of line */
358         *(buffer + strlen(buffer) - 1) = 0xA;
359         return ret;
360 }
361
362 static const struct sysfs_ops cache_ops = {
363         .show = kfd_cache_show,
364 };
365
366 static struct kobj_type cache_type = {
367         .release = kfd_topology_kobj_release,
368         .sysfs_ops = &cache_ops,
369 };
370
371 /****** Sysfs of Performance Counters ******/
372
373 struct kfd_perf_attr {
374         struct kobj_attribute attr;
375         uint32_t data;
376 };
377
378 static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
379                         char *buf)
380 {
381         struct kfd_perf_attr *attr;
382
383         buf[0] = 0;
384         attr = container_of(attrs, struct kfd_perf_attr, attr);
385         if (!attr->data) /* invalid data for PMC */
386                 return 0;
387         else
388                 return sysfs_show_32bit_val(buf, attr->data);
389 }
390
391 #define KFD_PERF_DESC(_name, _data)                     \
392 {                                                       \
393         .attr  = __ATTR(_name, 0444, perf_show, NULL),  \
394         .data = _data,                                  \
395 }
396
397 static struct kfd_perf_attr perf_attr_iommu[] = {
398         KFD_PERF_DESC(max_concurrent, 0),
399         KFD_PERF_DESC(num_counters, 0),
400         KFD_PERF_DESC(counter_ids, 0),
401 };
402 /****************************************/
403
404 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
405                 char *buffer)
406 {
407         struct kfd_topology_device *dev;
408         char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
409         uint32_t i;
410         uint32_t log_max_watch_addr;
411
412         /* Making sure that the buffer is an empty string */
413         buffer[0] = 0;
414
415         if (strcmp(attr->name, "gpu_id") == 0) {
416                 dev = container_of(attr, struct kfd_topology_device,
417                                 attr_gpuid);
418                 return sysfs_show_32bit_val(buffer, dev->gpu_id);
419         }
420
421         if (strcmp(attr->name, "name") == 0) {
422                 dev = container_of(attr, struct kfd_topology_device,
423                                 attr_name);
424                 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
425                         public_name[i] =
426                                         (char)dev->node_props.marketing_name[i];
427                         if (dev->node_props.marketing_name[i] == 0)
428                                 break;
429                 }
430                 public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
431                 return sysfs_show_str_val(buffer, public_name);
432         }
433
434         dev = container_of(attr, struct kfd_topology_device,
435                         attr_props);
436         sysfs_show_32bit_prop(buffer, "cpu_cores_count",
437                         dev->node_props.cpu_cores_count);
438         sysfs_show_32bit_prop(buffer, "simd_count",
439                         dev->node_props.simd_count);
440         sysfs_show_32bit_prop(buffer, "mem_banks_count",
441                         dev->node_props.mem_banks_count);
442         sysfs_show_32bit_prop(buffer, "caches_count",
443                         dev->node_props.caches_count);
444         sysfs_show_32bit_prop(buffer, "io_links_count",
445                         dev->node_props.io_links_count);
446         sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
447                         dev->node_props.cpu_core_id_base);
448         sysfs_show_32bit_prop(buffer, "simd_id_base",
449                         dev->node_props.simd_id_base);
450         sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
451                         dev->node_props.max_waves_per_simd);
452         sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
453                         dev->node_props.lds_size_in_kb);
454         sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
455                         dev->node_props.gds_size_in_kb);
456         sysfs_show_32bit_prop(buffer, "wave_front_size",
457                         dev->node_props.wave_front_size);
458         sysfs_show_32bit_prop(buffer, "array_count",
459                         dev->node_props.array_count);
460         sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
461                         dev->node_props.simd_arrays_per_engine);
462         sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
463                         dev->node_props.cu_per_simd_array);
464         sysfs_show_32bit_prop(buffer, "simd_per_cu",
465                         dev->node_props.simd_per_cu);
466         sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
467                         dev->node_props.max_slots_scratch_cu);
468         sysfs_show_32bit_prop(buffer, "vendor_id",
469                         dev->node_props.vendor_id);
470         sysfs_show_32bit_prop(buffer, "device_id",
471                         dev->node_props.device_id);
472         sysfs_show_32bit_prop(buffer, "location_id",
473                         dev->node_props.location_id);
474         sysfs_show_32bit_prop(buffer, "drm_render_minor",
475                         dev->node_props.drm_render_minor);
476         sysfs_show_64bit_prop(buffer, "hive_id",
477                         dev->node_props.hive_id);
478
479         if (dev->gpu) {
480                 log_max_watch_addr =
481                         __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
482
483                 if (log_max_watch_addr) {
484                         dev->node_props.capability |=
485                                         HSA_CAP_WATCH_POINTS_SUPPORTED;
486
487                         dev->node_props.capability |=
488                                 ((log_max_watch_addr <<
489                                         HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
490                                 HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
491                 }
492
493                 if (dev->gpu->device_info->asic_family == CHIP_TONGA)
494                         dev->node_props.capability |=
495                                         HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
496
497                 sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
498                         dev->node_props.max_engine_clk_fcompute);
499
500                 sysfs_show_64bit_prop(buffer, "local_mem_size",
501                                 (unsigned long long int) 0);
502
503                 sysfs_show_32bit_prop(buffer, "fw_version",
504                                 dev->gpu->mec_fw_version);
505                 sysfs_show_32bit_prop(buffer, "capability",
506                                 dev->node_props.capability);
507                 sysfs_show_32bit_prop(buffer, "sdma_fw_version",
508                                 dev->gpu->sdma_fw_version);
509         }
510
511         return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
512                                         cpufreq_quick_get_max(0)/1000);
513 }
514
515 static const struct sysfs_ops node_ops = {
516         .show = node_show,
517 };
518
519 static struct kobj_type node_type = {
520         .release = kfd_topology_kobj_release,
521         .sysfs_ops = &node_ops,
522 };
523
524 static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
525 {
526         sysfs_remove_file(kobj, attr);
527         kobject_del(kobj);
528         kobject_put(kobj);
529 }
530
531 static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
532 {
533         struct kfd_iolink_properties *iolink;
534         struct kfd_cache_properties *cache;
535         struct kfd_mem_properties *mem;
536         struct kfd_perf_properties *perf;
537
538         if (dev->kobj_iolink) {
539                 list_for_each_entry(iolink, &dev->io_link_props, list)
540                         if (iolink->kobj) {
541                                 kfd_remove_sysfs_file(iolink->kobj,
542                                                         &iolink->attr);
543                                 iolink->kobj = NULL;
544                         }
545                 kobject_del(dev->kobj_iolink);
546                 kobject_put(dev->kobj_iolink);
547                 dev->kobj_iolink = NULL;
548         }
549
550         if (dev->kobj_cache) {
551                 list_for_each_entry(cache, &dev->cache_props, list)
552                         if (cache->kobj) {
553                                 kfd_remove_sysfs_file(cache->kobj,
554                                                         &cache->attr);
555                                 cache->kobj = NULL;
556                         }
557                 kobject_del(dev->kobj_cache);
558                 kobject_put(dev->kobj_cache);
559                 dev->kobj_cache = NULL;
560         }
561
562         if (dev->kobj_mem) {
563                 list_for_each_entry(mem, &dev->mem_props, list)
564                         if (mem->kobj) {
565                                 kfd_remove_sysfs_file(mem->kobj, &mem->attr);
566                                 mem->kobj = NULL;
567                         }
568                 kobject_del(dev->kobj_mem);
569                 kobject_put(dev->kobj_mem);
570                 dev->kobj_mem = NULL;
571         }
572
573         if (dev->kobj_perf) {
574                 list_for_each_entry(perf, &dev->perf_props, list) {
575                         kfree(perf->attr_group);
576                         perf->attr_group = NULL;
577                 }
578                 kobject_del(dev->kobj_perf);
579                 kobject_put(dev->kobj_perf);
580                 dev->kobj_perf = NULL;
581         }
582
583         if (dev->kobj_node) {
584                 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
585                 sysfs_remove_file(dev->kobj_node, &dev->attr_name);
586                 sysfs_remove_file(dev->kobj_node, &dev->attr_props);
587                 kobject_del(dev->kobj_node);
588                 kobject_put(dev->kobj_node);
589                 dev->kobj_node = NULL;
590         }
591 }
592
593 static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
594                 uint32_t id)
595 {
596         struct kfd_iolink_properties *iolink;
597         struct kfd_cache_properties *cache;
598         struct kfd_mem_properties *mem;
599         struct kfd_perf_properties *perf;
600         int ret;
601         uint32_t i, num_attrs;
602         struct attribute **attrs;
603
604         if (WARN_ON(dev->kobj_node))
605                 return -EEXIST;
606
607         /*
608          * Creating the sysfs folders
609          */
610         dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
611         if (!dev->kobj_node)
612                 return -ENOMEM;
613
614         ret = kobject_init_and_add(dev->kobj_node, &node_type,
615                         sys_props.kobj_nodes, "%d", id);
616         if (ret < 0)
617                 return ret;
618
619         dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
620         if (!dev->kobj_mem)
621                 return -ENOMEM;
622
623         dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
624         if (!dev->kobj_cache)
625                 return -ENOMEM;
626
627         dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
628         if (!dev->kobj_iolink)
629                 return -ENOMEM;
630
631         dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
632         if (!dev->kobj_perf)
633                 return -ENOMEM;
634
635         /*
636          * Creating sysfs files for node properties
637          */
638         dev->attr_gpuid.name = "gpu_id";
639         dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
640         sysfs_attr_init(&dev->attr_gpuid);
641         dev->attr_name.name = "name";
642         dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
643         sysfs_attr_init(&dev->attr_name);
644         dev->attr_props.name = "properties";
645         dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
646         sysfs_attr_init(&dev->attr_props);
647         ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
648         if (ret < 0)
649                 return ret;
650         ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
651         if (ret < 0)
652                 return ret;
653         ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
654         if (ret < 0)
655                 return ret;
656
657         i = 0;
658         list_for_each_entry(mem, &dev->mem_props, list) {
659                 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
660                 if (!mem->kobj)
661                         return -ENOMEM;
662                 ret = kobject_init_and_add(mem->kobj, &mem_type,
663                                 dev->kobj_mem, "%d", i);
664                 if (ret < 0)
665                         return ret;
666
667                 mem->attr.name = "properties";
668                 mem->attr.mode = KFD_SYSFS_FILE_MODE;
669                 sysfs_attr_init(&mem->attr);
670                 ret = sysfs_create_file(mem->kobj, &mem->attr);
671                 if (ret < 0)
672                         return ret;
673                 i++;
674         }
675
676         i = 0;
677         list_for_each_entry(cache, &dev->cache_props, list) {
678                 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
679                 if (!cache->kobj)
680                         return -ENOMEM;
681                 ret = kobject_init_and_add(cache->kobj, &cache_type,
682                                 dev->kobj_cache, "%d", i);
683                 if (ret < 0)
684                         return ret;
685
686                 cache->attr.name = "properties";
687                 cache->attr.mode = KFD_SYSFS_FILE_MODE;
688                 sysfs_attr_init(&cache->attr);
689                 ret = sysfs_create_file(cache->kobj, &cache->attr);
690                 if (ret < 0)
691                         return ret;
692                 i++;
693         }
694
695         i = 0;
696         list_for_each_entry(iolink, &dev->io_link_props, list) {
697                 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
698                 if (!iolink->kobj)
699                         return -ENOMEM;
700                 ret = kobject_init_and_add(iolink->kobj, &iolink_type,
701                                 dev->kobj_iolink, "%d", i);
702                 if (ret < 0)
703                         return ret;
704
705                 iolink->attr.name = "properties";
706                 iolink->attr.mode = KFD_SYSFS_FILE_MODE;
707                 sysfs_attr_init(&iolink->attr);
708                 ret = sysfs_create_file(iolink->kobj, &iolink->attr);
709                 if (ret < 0)
710                         return ret;
711                 i++;
712         }
713
714         /* All hardware blocks have the same number of attributes. */
715         num_attrs = ARRAY_SIZE(perf_attr_iommu);
716         list_for_each_entry(perf, &dev->perf_props, list) {
717                 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
718                         * num_attrs + sizeof(struct attribute_group),
719                         GFP_KERNEL);
720                 if (!perf->attr_group)
721                         return -ENOMEM;
722
723                 attrs = (struct attribute **)(perf->attr_group + 1);
724                 if (!strcmp(perf->block_name, "iommu")) {
725                 /* Information of IOMMU's num_counters and counter_ids is shown
726                  * under /sys/bus/event_source/devices/amd_iommu. We don't
727                  * duplicate here.
728                  */
729                         perf_attr_iommu[0].data = perf->max_concurrent;
730                         for (i = 0; i < num_attrs; i++)
731                                 attrs[i] = &perf_attr_iommu[i].attr.attr;
732                 }
733                 perf->attr_group->name = perf->block_name;
734                 perf->attr_group->attrs = attrs;
735                 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
736                 if (ret < 0)
737                         return ret;
738         }
739
740         return 0;
741 }
742
743 /* Called with write topology lock acquired */
744 static int kfd_build_sysfs_node_tree(void)
745 {
746         struct kfd_topology_device *dev;
747         int ret;
748         uint32_t i = 0;
749
750         list_for_each_entry(dev, &topology_device_list, list) {
751                 ret = kfd_build_sysfs_node_entry(dev, i);
752                 if (ret < 0)
753                         return ret;
754                 i++;
755         }
756
757         return 0;
758 }
759
760 /* Called with write topology lock acquired */
761 static void kfd_remove_sysfs_node_tree(void)
762 {
763         struct kfd_topology_device *dev;
764
765         list_for_each_entry(dev, &topology_device_list, list)
766                 kfd_remove_sysfs_node_entry(dev);
767 }
768
769 static int kfd_topology_update_sysfs(void)
770 {
771         int ret;
772
773         pr_info("Creating topology SYSFS entries\n");
774         if (!sys_props.kobj_topology) {
775                 sys_props.kobj_topology =
776                                 kfd_alloc_struct(sys_props.kobj_topology);
777                 if (!sys_props.kobj_topology)
778                         return -ENOMEM;
779
780                 ret = kobject_init_and_add(sys_props.kobj_topology,
781                                 &sysprops_type,  &kfd_device->kobj,
782                                 "topology");
783                 if (ret < 0)
784                         return ret;
785
786                 sys_props.kobj_nodes = kobject_create_and_add("nodes",
787                                 sys_props.kobj_topology);
788                 if (!sys_props.kobj_nodes)
789                         return -ENOMEM;
790
791                 sys_props.attr_genid.name = "generation_id";
792                 sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
793                 sysfs_attr_init(&sys_props.attr_genid);
794                 ret = sysfs_create_file(sys_props.kobj_topology,
795                                 &sys_props.attr_genid);
796                 if (ret < 0)
797                         return ret;
798
799                 sys_props.attr_props.name = "system_properties";
800                 sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
801                 sysfs_attr_init(&sys_props.attr_props);
802                 ret = sysfs_create_file(sys_props.kobj_topology,
803                                 &sys_props.attr_props);
804                 if (ret < 0)
805                         return ret;
806         }
807
808         kfd_remove_sysfs_node_tree();
809
810         return kfd_build_sysfs_node_tree();
811 }
812
813 static void kfd_topology_release_sysfs(void)
814 {
815         kfd_remove_sysfs_node_tree();
816         if (sys_props.kobj_topology) {
817                 sysfs_remove_file(sys_props.kobj_topology,
818                                 &sys_props.attr_genid);
819                 sysfs_remove_file(sys_props.kobj_topology,
820                                 &sys_props.attr_props);
821                 if (sys_props.kobj_nodes) {
822                         kobject_del(sys_props.kobj_nodes);
823                         kobject_put(sys_props.kobj_nodes);
824                         sys_props.kobj_nodes = NULL;
825                 }
826                 kobject_del(sys_props.kobj_topology);
827                 kobject_put(sys_props.kobj_topology);
828                 sys_props.kobj_topology = NULL;
829         }
830 }
831
832 /* Called with write topology_lock acquired */
833 static void kfd_topology_update_device_list(struct list_head *temp_list,
834                                         struct list_head *master_list)
835 {
836         while (!list_empty(temp_list)) {
837                 list_move_tail(temp_list->next, master_list);
838                 sys_props.num_devices++;
839         }
840 }
841
842 static void kfd_debug_print_topology(void)
843 {
844         struct kfd_topology_device *dev;
845
846         down_read(&topology_lock);
847
848         dev = list_last_entry(&topology_device_list,
849                         struct kfd_topology_device, list);
850         if (dev) {
851                 if (dev->node_props.cpu_cores_count &&
852                                 dev->node_props.simd_count) {
853                         pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
854                                 dev->node_props.device_id,
855                                 dev->node_props.vendor_id);
856                 } else if (dev->node_props.cpu_cores_count)
857                         pr_info("Topology: Add CPU node\n");
858                 else if (dev->node_props.simd_count)
859                         pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
860                                 dev->node_props.device_id,
861                                 dev->node_props.vendor_id);
862         }
863         up_read(&topology_lock);
864 }
865
866 /* Helper function for intializing platform_xx members of
867  * kfd_system_properties. Uses OEM info from the last CPU/APU node.
868  */
869 static void kfd_update_system_properties(void)
870 {
871         struct kfd_topology_device *dev;
872
873         down_read(&topology_lock);
874         dev = list_last_entry(&topology_device_list,
875                         struct kfd_topology_device, list);
876         if (dev) {
877                 sys_props.platform_id =
878                         (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
879                 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
880                 sys_props.platform_rev = dev->oem_revision;
881         }
882         up_read(&topology_lock);
883 }
884
885 static void find_system_memory(const struct dmi_header *dm,
886         void *private)
887 {
888         struct kfd_mem_properties *mem;
889         u16 mem_width, mem_clock;
890         struct kfd_topology_device *kdev =
891                 (struct kfd_topology_device *)private;
892         const u8 *dmi_data = (const u8 *)(dm + 1);
893
894         if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
895                 mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
896                 mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
897                 list_for_each_entry(mem, &kdev->mem_props, list) {
898                         if (mem_width != 0xFFFF && mem_width != 0)
899                                 mem->width = mem_width;
900                         if (mem_clock != 0)
901                                 mem->mem_clk_max = mem_clock;
902                 }
903         }
904 }
905
906 /*
907  * Performance counters information is not part of CRAT but we would like to
908  * put them in the sysfs under topology directory for Thunk to get the data.
909  * This function is called before updating the sysfs.
910  */
911 static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
912 {
913         /* These are the only counters supported so far */
914         return kfd_iommu_add_perf_counters(kdev);
915 }
916
917 /* kfd_add_non_crat_information - Add information that is not currently
918  *      defined in CRAT but is necessary for KFD topology
919  * @dev - topology device to which addition info is added
920  */
921 static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
922 {
923         /* Check if CPU only node. */
924         if (!kdev->gpu) {
925                 /* Add system memory information */
926                 dmi_walk(find_system_memory, kdev);
927         }
928         /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
929 }
930
931 /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
932  *      Ignore CRAT for all other devices. AMD APU is identified if both CPU
933  *      and GPU cores are present.
934  * @device_list - topology device list created by parsing ACPI CRAT table.
935  * @return - TRUE if invalid, FALSE is valid.
936  */
937 static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
938 {
939         struct kfd_topology_device *dev;
940
941         list_for_each_entry(dev, device_list, list) {
942                 if (dev->node_props.cpu_cores_count &&
943                         dev->node_props.simd_count)
944                         return false;
945         }
946         pr_info("Ignoring ACPI CRAT on non-APU system\n");
947         return true;
948 }
949
950 int kfd_topology_init(void)
951 {
952         void *crat_image = NULL;
953         size_t image_size = 0;
954         int ret;
955         struct list_head temp_topology_device_list;
956         int cpu_only_node = 0;
957         struct kfd_topology_device *kdev;
958         int proximity_domain;
959
960         /* topology_device_list - Master list of all topology devices
961          * temp_topology_device_list - temporary list created while parsing CRAT
962          * or VCRAT. Once parsing is complete the contents of list is moved to
963          * topology_device_list
964          */
965
966         /* Initialize the head for the both the lists */
967         INIT_LIST_HEAD(&topology_device_list);
968         INIT_LIST_HEAD(&temp_topology_device_list);
969         init_rwsem(&topology_lock);
970
971         memset(&sys_props, 0, sizeof(sys_props));
972
973         /* Proximity domains in ACPI CRAT tables start counting at
974          * 0. The same should be true for virtual CRAT tables created
975          * at this stage. GPUs added later in kfd_topology_add_device
976          * use a counter.
977          */
978         proximity_domain = 0;
979
980         /*
981          * Get the CRAT image from the ACPI. If ACPI doesn't have one
982          * or if ACPI CRAT is invalid create a virtual CRAT.
983          * NOTE: The current implementation expects all AMD APUs to have
984          *      CRAT. If no CRAT is available, it is assumed to be a CPU
985          */
986         ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
987         if (!ret) {
988                 ret = kfd_parse_crat_table(crat_image,
989                                            &temp_topology_device_list,
990                                            proximity_domain);
991                 if (ret ||
992                     kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
993                         kfd_release_topology_device_list(
994                                 &temp_topology_device_list);
995                         kfd_destroy_crat_image(crat_image);
996                         crat_image = NULL;
997                 }
998         }
999
1000         if (!crat_image) {
1001                 ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
1002                                                     COMPUTE_UNIT_CPU, NULL,
1003                                                     proximity_domain);
1004                 cpu_only_node = 1;
1005                 if (ret) {
1006                         pr_err("Error creating VCRAT table for CPU\n");
1007                         return ret;
1008                 }
1009
1010                 ret = kfd_parse_crat_table(crat_image,
1011                                            &temp_topology_device_list,
1012                                            proximity_domain);
1013                 if (ret) {
1014                         pr_err("Error parsing VCRAT table for CPU\n");
1015                         goto err;
1016                 }
1017         }
1018
1019         kdev = list_first_entry(&temp_topology_device_list,
1020                                 struct kfd_topology_device, list);
1021         kfd_add_perf_to_topology(kdev);
1022
1023         down_write(&topology_lock);
1024         kfd_topology_update_device_list(&temp_topology_device_list,
1025                                         &topology_device_list);
1026         atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
1027         ret = kfd_topology_update_sysfs();
1028         up_write(&topology_lock);
1029
1030         if (!ret) {
1031                 sys_props.generation_count++;
1032                 kfd_update_system_properties();
1033                 kfd_debug_print_topology();
1034                 pr_info("Finished initializing topology\n");
1035         } else
1036                 pr_err("Failed to update topology in sysfs ret=%d\n", ret);
1037
1038         /* For nodes with GPU, this information gets added
1039          * when GPU is detected (kfd_topology_add_device).
1040          */
1041         if (cpu_only_node) {
1042                 /* Add additional information to CPU only node created above */
1043                 down_write(&topology_lock);
1044                 kdev = list_first_entry(&topology_device_list,
1045                                 struct kfd_topology_device, list);
1046                 up_write(&topology_lock);
1047                 kfd_add_non_crat_information(kdev);
1048         }
1049
1050 err:
1051         kfd_destroy_crat_image(crat_image);
1052         return ret;
1053 }
1054
1055 void kfd_topology_shutdown(void)
1056 {
1057         down_write(&topology_lock);
1058         kfd_topology_release_sysfs();
1059         kfd_release_live_view();
1060         up_write(&topology_lock);
1061 }
1062
1063 static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
1064 {
1065         uint32_t hashout;
1066         uint32_t buf[7];
1067         uint64_t local_mem_size;
1068         int i;
1069         struct kfd_local_mem_info local_mem_info;
1070
1071         if (!gpu)
1072                 return 0;
1073
1074         amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
1075
1076         local_mem_size = local_mem_info.local_mem_size_private +
1077                         local_mem_info.local_mem_size_public;
1078
1079         buf[0] = gpu->pdev->devfn;
1080         buf[1] = gpu->pdev->subsystem_vendor;
1081         buf[2] = gpu->pdev->subsystem_device;
1082         buf[3] = gpu->pdev->device;
1083         buf[4] = gpu->pdev->bus->number;
1084         buf[5] = lower_32_bits(local_mem_size);
1085         buf[6] = upper_32_bits(local_mem_size);
1086
1087         for (i = 0, hashout = 0; i < 7; i++)
1088                 hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
1089
1090         return hashout;
1091 }
1092 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1093  *              the GPU device is not already present in the topology device
1094  *              list then return NULL. This means a new topology device has to
1095  *              be created for this GPU.
1096  * TODO: Rather than assiging @gpu to first topology device withtout
1097  *              gpu attached, it will better to have more stringent check.
1098  */
1099 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
1100 {
1101         struct kfd_topology_device *dev;
1102         struct kfd_topology_device *out_dev = NULL;
1103
1104         down_write(&topology_lock);
1105         list_for_each_entry(dev, &topology_device_list, list)
1106                 if (!dev->gpu && (dev->node_props.simd_count > 0)) {
1107                         dev->gpu = gpu;
1108                         out_dev = dev;
1109                         break;
1110                 }
1111         up_write(&topology_lock);
1112         return out_dev;
1113 }
1114
1115 static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
1116 {
1117         /*
1118          * TODO: Generate an event for thunk about the arrival/removal
1119          * of the GPU
1120          */
1121 }
1122
1123 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1124  *              patch this after CRAT parsing.
1125  */
1126 static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
1127 {
1128         struct kfd_mem_properties *mem;
1129         struct kfd_local_mem_info local_mem_info;
1130
1131         if (!dev)
1132                 return;
1133
1134         /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
1135          * single bank of VRAM local memory.
1136          * for dGPUs - VCRAT reports only one bank of Local Memory
1137          * for APUs - If CRAT from ACPI reports more than one bank, then
1138          *      all the banks will report the same mem_clk_max information
1139          */
1140         amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
1141
1142         list_for_each_entry(mem, &dev->mem_props, list)
1143                 mem->mem_clk_max = local_mem_info.mem_clk_max;
1144 }
1145
1146 static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
1147 {
1148         struct kfd_iolink_properties *link, *cpu_link;
1149         struct kfd_topology_device *cpu_dev;
1150         uint32_t cap;
1151         uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
1152         uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
1153
1154         if (!dev || !dev->gpu)
1155                 return;
1156
1157         pcie_capability_read_dword(dev->gpu->pdev,
1158                         PCI_EXP_DEVCAP2, &cap);
1159
1160         if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
1161                      PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
1162                 cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1163                         CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1164
1165         if (!dev->gpu->pci_atomic_requested ||
1166             dev->gpu->device_info->asic_family == CHIP_HAWAII)
1167                 flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1168                         CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1169
1170         /* GPU only creates direct links so apply flags setting to all */
1171         list_for_each_entry(link, &dev->io_link_props, list) {
1172                 link->flags = flag;
1173                 cpu_dev = kfd_topology_device_by_proximity_domain(
1174                                 link->node_to);
1175                 if (cpu_dev) {
1176                         list_for_each_entry(cpu_link,
1177                                             &cpu_dev->io_link_props, list)
1178                                 if (cpu_link->node_to == link->node_from)
1179                                         cpu_link->flags = cpu_flag;
1180                 }
1181         }
1182 }
1183
1184 int kfd_topology_add_device(struct kfd_dev *gpu)
1185 {
1186         uint32_t gpu_id;
1187         struct kfd_topology_device *dev;
1188         struct kfd_cu_info cu_info;
1189         int res = 0;
1190         struct list_head temp_topology_device_list;
1191         void *crat_image = NULL;
1192         size_t image_size = 0;
1193         int proximity_domain;
1194
1195         INIT_LIST_HEAD(&temp_topology_device_list);
1196
1197         gpu_id = kfd_generate_gpu_id(gpu);
1198
1199         pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
1200
1201         proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
1202
1203         /* Check to see if this gpu device exists in the topology_device_list.
1204          * If so, assign the gpu to that device,
1205          * else create a Virtual CRAT for this gpu device and then parse that
1206          * CRAT to create a new topology device. Once created assign the gpu to
1207          * that topology device
1208          */
1209         dev = kfd_assign_gpu(gpu);
1210         if (!dev) {
1211                 res = kfd_create_crat_image_virtual(&crat_image, &image_size,
1212                                                     COMPUTE_UNIT_GPU, gpu,
1213                                                     proximity_domain);
1214                 if (res) {
1215                         pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
1216                                gpu_id);
1217                         return res;
1218                 }
1219                 res = kfd_parse_crat_table(crat_image,
1220                                            &temp_topology_device_list,
1221                                            proximity_domain);
1222                 if (res) {
1223                         pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
1224                                gpu_id);
1225                         goto err;
1226                 }
1227
1228                 down_write(&topology_lock);
1229                 kfd_topology_update_device_list(&temp_topology_device_list,
1230                         &topology_device_list);
1231
1232                 /* Update the SYSFS tree, since we added another topology
1233                  * device
1234                  */
1235                 res = kfd_topology_update_sysfs();
1236                 up_write(&topology_lock);
1237
1238                 if (!res)
1239                         sys_props.generation_count++;
1240                 else
1241                         pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
1242                                                 gpu_id, res);
1243                 dev = kfd_assign_gpu(gpu);
1244                 if (WARN_ON(!dev)) {
1245                         res = -ENODEV;
1246                         goto err;
1247                 }
1248         }
1249
1250         dev->gpu_id = gpu_id;
1251         gpu->id = gpu_id;
1252
1253         /* TODO: Move the following lines to function
1254          *      kfd_add_non_crat_information
1255          */
1256
1257         /* Fill-in additional information that is not available in CRAT but
1258          * needed for the topology
1259          */
1260
1261         amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
1262         dev->node_props.simd_arrays_per_engine =
1263                 cu_info.num_shader_arrays_per_engine;
1264
1265         dev->node_props.vendor_id = gpu->pdev->vendor;
1266         dev->node_props.device_id = gpu->pdev->device;
1267         dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
1268                 gpu->pdev->devfn);
1269         dev->node_props.max_engine_clk_fcompute =
1270                 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
1271         dev->node_props.max_engine_clk_ccompute =
1272                 cpufreq_quick_get_max(0) / 1000;
1273         dev->node_props.drm_render_minor =
1274                 gpu->shared_resources.drm_render_minor;
1275
1276         dev->node_props.hive_id = gpu->hive_id;
1277
1278         kfd_fill_mem_clk_max_info(dev);
1279         kfd_fill_iolink_non_crat_info(dev);
1280
1281         switch (dev->gpu->device_info->asic_family) {
1282         case CHIP_KAVERI:
1283         case CHIP_HAWAII:
1284         case CHIP_TONGA:
1285                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
1286                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1287                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1288                 break;
1289         case CHIP_CARRIZO:
1290         case CHIP_FIJI:
1291         case CHIP_POLARIS10:
1292         case CHIP_POLARIS11:
1293         case CHIP_POLARIS12:
1294                 pr_debug("Adding doorbell packet type capability\n");
1295                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
1296                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1297                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1298                 break;
1299         case CHIP_VEGA10:
1300         case CHIP_VEGA12:
1301         case CHIP_VEGA20:
1302         case CHIP_RAVEN:
1303                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
1304                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1305                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1306                 break;
1307         default:
1308                 WARN(1, "Unexpected ASIC family %u",
1309                      dev->gpu->device_info->asic_family);
1310         }
1311
1312         /* Fix errors in CZ CRAT.
1313          * simd_count: Carrizo CRAT reports wrong simd_count, probably
1314          *              because it doesn't consider masked out CUs
1315          * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
1316          * capability flag: Carrizo CRAT doesn't report IOMMU flags
1317          */
1318         if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
1319                 dev->node_props.simd_count =
1320                         cu_info.simd_per_cu * cu_info.cu_active_number;
1321                 dev->node_props.max_waves_per_simd = 10;
1322                 dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
1323         }
1324
1325         kfd_debug_print_topology();
1326
1327         if (!res)
1328                 kfd_notify_gpu_change(gpu_id, 1);
1329 err:
1330         kfd_destroy_crat_image(crat_image);
1331         return res;
1332 }
1333
1334 int kfd_topology_remove_device(struct kfd_dev *gpu)
1335 {
1336         struct kfd_topology_device *dev, *tmp;
1337         uint32_t gpu_id;
1338         int res = -ENODEV;
1339
1340         down_write(&topology_lock);
1341
1342         list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
1343                 if (dev->gpu == gpu) {
1344                         gpu_id = dev->gpu_id;
1345                         kfd_remove_sysfs_node_entry(dev);
1346                         kfd_release_topology_device(dev);
1347                         sys_props.num_devices--;
1348                         res = 0;
1349                         if (kfd_topology_update_sysfs() < 0)
1350                                 kfd_topology_release_sysfs();
1351                         break;
1352                 }
1353
1354         up_write(&topology_lock);
1355
1356         if (!res)
1357                 kfd_notify_gpu_change(gpu_id, 0);
1358
1359         return res;
1360 }
1361
1362 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
1363  *      topology. If GPU device is found @idx, then valid kfd_dev pointer is
1364  *      returned through @kdev
1365  * Return -     0: On success (@kdev will be NULL for non GPU nodes)
1366  *              -1: If end of list
1367  */
1368 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
1369 {
1370
1371         struct kfd_topology_device *top_dev;
1372         uint8_t device_idx = 0;
1373
1374         *kdev = NULL;
1375         down_read(&topology_lock);
1376
1377         list_for_each_entry(top_dev, &topology_device_list, list) {
1378                 if (device_idx == idx) {
1379                         *kdev = top_dev->gpu;
1380                         up_read(&topology_lock);
1381                         return 0;
1382                 }
1383
1384                 device_idx++;
1385         }
1386
1387         up_read(&topology_lock);
1388
1389         return -1;
1390
1391 }
1392
1393 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
1394 {
1395         const struct cpuinfo_x86 *cpuinfo;
1396         int first_cpu_of_numa_node;
1397
1398         if (!cpumask || cpumask == cpu_none_mask)
1399                 return -1;
1400         first_cpu_of_numa_node = cpumask_first(cpumask);
1401         if (first_cpu_of_numa_node >= nr_cpu_ids)
1402                 return -1;
1403         cpuinfo = &cpu_data(first_cpu_of_numa_node);
1404
1405         return cpuinfo->apicid;
1406 }
1407
1408 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
1409  *      of the given NUMA node (numa_node_id)
1410  * Return -1 on failure
1411  */
1412 int kfd_numa_node_to_apic_id(int numa_node_id)
1413 {
1414         if (numa_node_id == -1) {
1415                 pr_warn("Invalid NUMA Node. Use online CPU mask\n");
1416                 return kfd_cpumask_to_apic_id(cpu_online_mask);
1417         }
1418         return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
1419 }
1420
1421 #if defined(CONFIG_DEBUG_FS)
1422
1423 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
1424 {
1425         struct kfd_topology_device *dev;
1426         unsigned int i = 0;
1427         int r = 0;
1428
1429         down_read(&topology_lock);
1430
1431         list_for_each_entry(dev, &topology_device_list, list) {
1432                 if (!dev->gpu) {
1433                         i++;
1434                         continue;
1435                 }
1436
1437                 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1438                 r = dqm_debugfs_hqds(m, dev->gpu->dqm);
1439                 if (r)
1440                         break;
1441         }
1442
1443         up_read(&topology_lock);
1444
1445         return r;
1446 }
1447
1448 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
1449 {
1450         struct kfd_topology_device *dev;
1451         unsigned int i = 0;
1452         int r = 0;
1453
1454         down_read(&topology_lock);
1455
1456         list_for_each_entry(dev, &topology_device_list, list) {
1457                 if (!dev->gpu) {
1458                         i++;
1459                         continue;
1460                 }
1461
1462                 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1463                 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
1464                 if (r)
1465                         break;
1466         }
1467
1468         up_read(&topology_lock);
1469
1470         return r;
1471 }
1472
1473 #endif