Merge branch 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kerne...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdkfd / kfd_topology.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/errno.h>
27 #include <linux/acpi.h>
28 #include <linux/hash.h>
29 #include <linux/cpufreq.h>
30 #include <linux/log2.h>
31 #include <linux/dmi.h>
32 #include <linux/atomic.h>
33
34 #include "kfd_priv.h"
35 #include "kfd_crat.h"
36 #include "kfd_topology.h"
37 #include "kfd_device_queue_manager.h"
38 #include "kfd_iommu.h"
39 #include "amdgpu_amdkfd.h"
40
41 /* topology_device_list - Master list of all topology devices */
42 static struct list_head topology_device_list;
43 static struct kfd_system_properties sys_props;
44
45 static DECLARE_RWSEM(topology_lock);
46 static atomic_t topology_crat_proximity_domain;
47
48 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
49                                                 uint32_t proximity_domain)
50 {
51         struct kfd_topology_device *top_dev;
52         struct kfd_topology_device *device = NULL;
53
54         down_read(&topology_lock);
55
56         list_for_each_entry(top_dev, &topology_device_list, list)
57                 if (top_dev->proximity_domain == proximity_domain) {
58                         device = top_dev;
59                         break;
60                 }
61
62         up_read(&topology_lock);
63
64         return device;
65 }
66
67 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
68 {
69         struct kfd_topology_device *top_dev = NULL;
70         struct kfd_topology_device *ret = NULL;
71
72         down_read(&topology_lock);
73
74         list_for_each_entry(top_dev, &topology_device_list, list)
75                 if (top_dev->gpu_id == gpu_id) {
76                         ret = top_dev;
77                         break;
78                 }
79
80         up_read(&topology_lock);
81
82         return ret;
83 }
84
85 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
86 {
87         struct kfd_topology_device *top_dev;
88
89         top_dev = kfd_topology_device_by_id(gpu_id);
90         if (!top_dev)
91                 return NULL;
92
93         return top_dev->gpu;
94 }
95
96 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
97 {
98         struct kfd_topology_device *top_dev;
99         struct kfd_dev *device = NULL;
100
101         down_read(&topology_lock);
102
103         list_for_each_entry(top_dev, &topology_device_list, list)
104                 if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
105                         device = top_dev->gpu;
106                         break;
107                 }
108
109         up_read(&topology_lock);
110
111         return device;
112 }
113
114 struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
115 {
116         struct kfd_topology_device *top_dev;
117         struct kfd_dev *device = NULL;
118
119         down_read(&topology_lock);
120
121         list_for_each_entry(top_dev, &topology_device_list, list)
122                 if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
123                         device = top_dev->gpu;
124                         break;
125                 }
126
127         up_read(&topology_lock);
128
129         return device;
130 }
131
132 /* Called with write topology_lock acquired */
133 static void kfd_release_topology_device(struct kfd_topology_device *dev)
134 {
135         struct kfd_mem_properties *mem;
136         struct kfd_cache_properties *cache;
137         struct kfd_iolink_properties *iolink;
138         struct kfd_perf_properties *perf;
139
140         list_del(&dev->list);
141
142         while (dev->mem_props.next != &dev->mem_props) {
143                 mem = container_of(dev->mem_props.next,
144                                 struct kfd_mem_properties, list);
145                 list_del(&mem->list);
146                 kfree(mem);
147         }
148
149         while (dev->cache_props.next != &dev->cache_props) {
150                 cache = container_of(dev->cache_props.next,
151                                 struct kfd_cache_properties, list);
152                 list_del(&cache->list);
153                 kfree(cache);
154         }
155
156         while (dev->io_link_props.next != &dev->io_link_props) {
157                 iolink = container_of(dev->io_link_props.next,
158                                 struct kfd_iolink_properties, list);
159                 list_del(&iolink->list);
160                 kfree(iolink);
161         }
162
163         while (dev->perf_props.next != &dev->perf_props) {
164                 perf = container_of(dev->perf_props.next,
165                                 struct kfd_perf_properties, list);
166                 list_del(&perf->list);
167                 kfree(perf);
168         }
169
170         kfree(dev);
171 }
172
173 void kfd_release_topology_device_list(struct list_head *device_list)
174 {
175         struct kfd_topology_device *dev;
176
177         while (!list_empty(device_list)) {
178                 dev = list_first_entry(device_list,
179                                        struct kfd_topology_device, list);
180                 kfd_release_topology_device(dev);
181         }
182 }
183
184 static void kfd_release_live_view(void)
185 {
186         kfd_release_topology_device_list(&topology_device_list);
187         memset(&sys_props, 0, sizeof(sys_props));
188 }
189
190 struct kfd_topology_device *kfd_create_topology_device(
191                                 struct list_head *device_list)
192 {
193         struct kfd_topology_device *dev;
194
195         dev = kfd_alloc_struct(dev);
196         if (!dev) {
197                 pr_err("No memory to allocate a topology device");
198                 return NULL;
199         }
200
201         INIT_LIST_HEAD(&dev->mem_props);
202         INIT_LIST_HEAD(&dev->cache_props);
203         INIT_LIST_HEAD(&dev->io_link_props);
204         INIT_LIST_HEAD(&dev->perf_props);
205
206         list_add_tail(&dev->list, device_list);
207
208         return dev;
209 }
210
211
212 #define sysfs_show_gen_prop(buffer, fmt, ...) \
213                 snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
214 #define sysfs_show_32bit_prop(buffer, name, value) \
215                 sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
216 #define sysfs_show_64bit_prop(buffer, name, value) \
217                 sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
218 #define sysfs_show_32bit_val(buffer, value) \
219                 sysfs_show_gen_prop(buffer, "%u\n", value)
220 #define sysfs_show_str_val(buffer, value) \
221                 sysfs_show_gen_prop(buffer, "%s\n", value)
222
223 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
224                 char *buffer)
225 {
226         ssize_t ret;
227
228         /* Making sure that the buffer is an empty string */
229         buffer[0] = 0;
230
231         if (attr == &sys_props.attr_genid) {
232                 ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
233         } else if (attr == &sys_props.attr_props) {
234                 sysfs_show_64bit_prop(buffer, "platform_oem",
235                                 sys_props.platform_oem);
236                 sysfs_show_64bit_prop(buffer, "platform_id",
237                                 sys_props.platform_id);
238                 ret = sysfs_show_64bit_prop(buffer, "platform_rev",
239                                 sys_props.platform_rev);
240         } else {
241                 ret = -EINVAL;
242         }
243
244         return ret;
245 }
246
247 static void kfd_topology_kobj_release(struct kobject *kobj)
248 {
249         kfree(kobj);
250 }
251
252 static const struct sysfs_ops sysprops_ops = {
253         .show = sysprops_show,
254 };
255
256 static struct kobj_type sysprops_type = {
257         .release = kfd_topology_kobj_release,
258         .sysfs_ops = &sysprops_ops,
259 };
260
261 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
262                 char *buffer)
263 {
264         ssize_t ret;
265         struct kfd_iolink_properties *iolink;
266
267         /* Making sure that the buffer is an empty string */
268         buffer[0] = 0;
269
270         iolink = container_of(attr, struct kfd_iolink_properties, attr);
271         sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
272         sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
273         sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
274         sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
275         sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
276         sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
277         sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
278         sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
279         sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
280         sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
281         sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
282                         iolink->rec_transfer_size);
283         ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
284
285         return ret;
286 }
287
288 static const struct sysfs_ops iolink_ops = {
289         .show = iolink_show,
290 };
291
292 static struct kobj_type iolink_type = {
293         .release = kfd_topology_kobj_release,
294         .sysfs_ops = &iolink_ops,
295 };
296
297 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
298                 char *buffer)
299 {
300         ssize_t ret;
301         struct kfd_mem_properties *mem;
302
303         /* Making sure that the buffer is an empty string */
304         buffer[0] = 0;
305
306         mem = container_of(attr, struct kfd_mem_properties, attr);
307         sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
308         sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
309         sysfs_show_32bit_prop(buffer, "flags", mem->flags);
310         sysfs_show_32bit_prop(buffer, "width", mem->width);
311         ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
312
313         return ret;
314 }
315
316 static const struct sysfs_ops mem_ops = {
317         .show = mem_show,
318 };
319
320 static struct kobj_type mem_type = {
321         .release = kfd_topology_kobj_release,
322         .sysfs_ops = &mem_ops,
323 };
324
325 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
326                 char *buffer)
327 {
328         ssize_t ret;
329         uint32_t i, j;
330         struct kfd_cache_properties *cache;
331
332         /* Making sure that the buffer is an empty string */
333         buffer[0] = 0;
334
335         cache = container_of(attr, struct kfd_cache_properties, attr);
336         sysfs_show_32bit_prop(buffer, "processor_id_low",
337                         cache->processor_id_low);
338         sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
339         sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
340         sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
341         sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
342                         cache->cachelines_per_tag);
343         sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
344         sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
345         sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
346         snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
347         for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
348                 for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
349                         /* Check each bit */
350                         if (cache->sibling_map[i] & (1 << j))
351                                 ret = snprintf(buffer, PAGE_SIZE,
352                                          "%s%d%s", buffer, 1, ",");
353                         else
354                                 ret = snprintf(buffer, PAGE_SIZE,
355                                          "%s%d%s", buffer, 0, ",");
356                 }
357         /* Replace the last "," with end of line */
358         *(buffer + strlen(buffer) - 1) = 0xA;
359         return ret;
360 }
361
362 static const struct sysfs_ops cache_ops = {
363         .show = kfd_cache_show,
364 };
365
366 static struct kobj_type cache_type = {
367         .release = kfd_topology_kobj_release,
368         .sysfs_ops = &cache_ops,
369 };
370
371 /****** Sysfs of Performance Counters ******/
372
373 struct kfd_perf_attr {
374         struct kobj_attribute attr;
375         uint32_t data;
376 };
377
378 static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
379                         char *buf)
380 {
381         struct kfd_perf_attr *attr;
382
383         buf[0] = 0;
384         attr = container_of(attrs, struct kfd_perf_attr, attr);
385         if (!attr->data) /* invalid data for PMC */
386                 return 0;
387         else
388                 return sysfs_show_32bit_val(buf, attr->data);
389 }
390
391 #define KFD_PERF_DESC(_name, _data)                     \
392 {                                                       \
393         .attr  = __ATTR(_name, 0444, perf_show, NULL),  \
394         .data = _data,                                  \
395 }
396
397 static struct kfd_perf_attr perf_attr_iommu[] = {
398         KFD_PERF_DESC(max_concurrent, 0),
399         KFD_PERF_DESC(num_counters, 0),
400         KFD_PERF_DESC(counter_ids, 0),
401 };
402 /****************************************/
403
404 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
405                 char *buffer)
406 {
407         struct kfd_topology_device *dev;
408         char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
409         uint32_t i;
410         uint32_t log_max_watch_addr;
411
412         /* Making sure that the buffer is an empty string */
413         buffer[0] = 0;
414
415         if (strcmp(attr->name, "gpu_id") == 0) {
416                 dev = container_of(attr, struct kfd_topology_device,
417                                 attr_gpuid);
418                 return sysfs_show_32bit_val(buffer, dev->gpu_id);
419         }
420
421         if (strcmp(attr->name, "name") == 0) {
422                 dev = container_of(attr, struct kfd_topology_device,
423                                 attr_name);
424                 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
425                         public_name[i] =
426                                         (char)dev->node_props.marketing_name[i];
427                         if (dev->node_props.marketing_name[i] == 0)
428                                 break;
429                 }
430                 public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
431                 return sysfs_show_str_val(buffer, public_name);
432         }
433
434         dev = container_of(attr, struct kfd_topology_device,
435                         attr_props);
436         sysfs_show_32bit_prop(buffer, "cpu_cores_count",
437                         dev->node_props.cpu_cores_count);
438         sysfs_show_32bit_prop(buffer, "simd_count",
439                         dev->node_props.simd_count);
440         sysfs_show_32bit_prop(buffer, "mem_banks_count",
441                         dev->node_props.mem_banks_count);
442         sysfs_show_32bit_prop(buffer, "caches_count",
443                         dev->node_props.caches_count);
444         sysfs_show_32bit_prop(buffer, "io_links_count",
445                         dev->node_props.io_links_count);
446         sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
447                         dev->node_props.cpu_core_id_base);
448         sysfs_show_32bit_prop(buffer, "simd_id_base",
449                         dev->node_props.simd_id_base);
450         sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
451                         dev->node_props.max_waves_per_simd);
452         sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
453                         dev->node_props.lds_size_in_kb);
454         sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
455                         dev->node_props.gds_size_in_kb);
456         sysfs_show_32bit_prop(buffer, "wave_front_size",
457                         dev->node_props.wave_front_size);
458         sysfs_show_32bit_prop(buffer, "array_count",
459                         dev->node_props.array_count);
460         sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
461                         dev->node_props.simd_arrays_per_engine);
462         sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
463                         dev->node_props.cu_per_simd_array);
464         sysfs_show_32bit_prop(buffer, "simd_per_cu",
465                         dev->node_props.simd_per_cu);
466         sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
467                         dev->node_props.max_slots_scratch_cu);
468         sysfs_show_32bit_prop(buffer, "vendor_id",
469                         dev->node_props.vendor_id);
470         sysfs_show_32bit_prop(buffer, "device_id",
471                         dev->node_props.device_id);
472         sysfs_show_32bit_prop(buffer, "location_id",
473                         dev->node_props.location_id);
474         sysfs_show_32bit_prop(buffer, "drm_render_minor",
475                         dev->node_props.drm_render_minor);
476         sysfs_show_64bit_prop(buffer, "hive_id",
477                         dev->node_props.hive_id);
478
479         if (dev->gpu) {
480                 log_max_watch_addr =
481                         __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
482
483                 if (log_max_watch_addr) {
484                         dev->node_props.capability |=
485                                         HSA_CAP_WATCH_POINTS_SUPPORTED;
486
487                         dev->node_props.capability |=
488                                 ((log_max_watch_addr <<
489                                         HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
490                                 HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
491                 }
492
493                 if (dev->gpu->device_info->asic_family == CHIP_TONGA)
494                         dev->node_props.capability |=
495                                         HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
496
497                 sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
498                         dev->node_props.max_engine_clk_fcompute);
499
500                 sysfs_show_64bit_prop(buffer, "local_mem_size",
501                                 (unsigned long long int) 0);
502
503                 sysfs_show_32bit_prop(buffer, "fw_version",
504                                 dev->gpu->mec_fw_version);
505                 sysfs_show_32bit_prop(buffer, "capability",
506                                 dev->node_props.capability);
507                 sysfs_show_32bit_prop(buffer, "sdma_fw_version",
508                                 dev->gpu->sdma_fw_version);
509         }
510
511         return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
512                                         cpufreq_quick_get_max(0)/1000);
513 }
514
515 static const struct sysfs_ops node_ops = {
516         .show = node_show,
517 };
518
519 static struct kobj_type node_type = {
520         .release = kfd_topology_kobj_release,
521         .sysfs_ops = &node_ops,
522 };
523
524 static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
525 {
526         sysfs_remove_file(kobj, attr);
527         kobject_del(kobj);
528         kobject_put(kobj);
529 }
530
531 static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
532 {
533         struct kfd_iolink_properties *iolink;
534         struct kfd_cache_properties *cache;
535         struct kfd_mem_properties *mem;
536         struct kfd_perf_properties *perf;
537
538         if (dev->kobj_iolink) {
539                 list_for_each_entry(iolink, &dev->io_link_props, list)
540                         if (iolink->kobj) {
541                                 kfd_remove_sysfs_file(iolink->kobj,
542                                                         &iolink->attr);
543                                 iolink->kobj = NULL;
544                         }
545                 kobject_del(dev->kobj_iolink);
546                 kobject_put(dev->kobj_iolink);
547                 dev->kobj_iolink = NULL;
548         }
549
550         if (dev->kobj_cache) {
551                 list_for_each_entry(cache, &dev->cache_props, list)
552                         if (cache->kobj) {
553                                 kfd_remove_sysfs_file(cache->kobj,
554                                                         &cache->attr);
555                                 cache->kobj = NULL;
556                         }
557                 kobject_del(dev->kobj_cache);
558                 kobject_put(dev->kobj_cache);
559                 dev->kobj_cache = NULL;
560         }
561
562         if (dev->kobj_mem) {
563                 list_for_each_entry(mem, &dev->mem_props, list)
564                         if (mem->kobj) {
565                                 kfd_remove_sysfs_file(mem->kobj, &mem->attr);
566                                 mem->kobj = NULL;
567                         }
568                 kobject_del(dev->kobj_mem);
569                 kobject_put(dev->kobj_mem);
570                 dev->kobj_mem = NULL;
571         }
572
573         if (dev->kobj_perf) {
574                 list_for_each_entry(perf, &dev->perf_props, list) {
575                         kfree(perf->attr_group);
576                         perf->attr_group = NULL;
577                 }
578                 kobject_del(dev->kobj_perf);
579                 kobject_put(dev->kobj_perf);
580                 dev->kobj_perf = NULL;
581         }
582
583         if (dev->kobj_node) {
584                 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
585                 sysfs_remove_file(dev->kobj_node, &dev->attr_name);
586                 sysfs_remove_file(dev->kobj_node, &dev->attr_props);
587                 kobject_del(dev->kobj_node);
588                 kobject_put(dev->kobj_node);
589                 dev->kobj_node = NULL;
590         }
591 }
592
593 static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
594                 uint32_t id)
595 {
596         struct kfd_iolink_properties *iolink;
597         struct kfd_cache_properties *cache;
598         struct kfd_mem_properties *mem;
599         struct kfd_perf_properties *perf;
600         int ret;
601         uint32_t i, num_attrs;
602         struct attribute **attrs;
603
604         if (WARN_ON(dev->kobj_node))
605                 return -EEXIST;
606
607         /*
608          * Creating the sysfs folders
609          */
610         dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
611         if (!dev->kobj_node)
612                 return -ENOMEM;
613
614         ret = kobject_init_and_add(dev->kobj_node, &node_type,
615                         sys_props.kobj_nodes, "%d", id);
616         if (ret < 0)
617                 return ret;
618
619         dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
620         if (!dev->kobj_mem)
621                 return -ENOMEM;
622
623         dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
624         if (!dev->kobj_cache)
625                 return -ENOMEM;
626
627         dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
628         if (!dev->kobj_iolink)
629                 return -ENOMEM;
630
631         dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
632         if (!dev->kobj_perf)
633                 return -ENOMEM;
634
635         /*
636          * Creating sysfs files for node properties
637          */
638         dev->attr_gpuid.name = "gpu_id";
639         dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
640         sysfs_attr_init(&dev->attr_gpuid);
641         dev->attr_name.name = "name";
642         dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
643         sysfs_attr_init(&dev->attr_name);
644         dev->attr_props.name = "properties";
645         dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
646         sysfs_attr_init(&dev->attr_props);
647         ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
648         if (ret < 0)
649                 return ret;
650         ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
651         if (ret < 0)
652                 return ret;
653         ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
654         if (ret < 0)
655                 return ret;
656
657         i = 0;
658         list_for_each_entry(mem, &dev->mem_props, list) {
659                 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
660                 if (!mem->kobj)
661                         return -ENOMEM;
662                 ret = kobject_init_and_add(mem->kobj, &mem_type,
663                                 dev->kobj_mem, "%d", i);
664                 if (ret < 0)
665                         return ret;
666
667                 mem->attr.name = "properties";
668                 mem->attr.mode = KFD_SYSFS_FILE_MODE;
669                 sysfs_attr_init(&mem->attr);
670                 ret = sysfs_create_file(mem->kobj, &mem->attr);
671                 if (ret < 0)
672                         return ret;
673                 i++;
674         }
675
676         i = 0;
677         list_for_each_entry(cache, &dev->cache_props, list) {
678                 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
679                 if (!cache->kobj)
680                         return -ENOMEM;
681                 ret = kobject_init_and_add(cache->kobj, &cache_type,
682                                 dev->kobj_cache, "%d", i);
683                 if (ret < 0)
684                         return ret;
685
686                 cache->attr.name = "properties";
687                 cache->attr.mode = KFD_SYSFS_FILE_MODE;
688                 sysfs_attr_init(&cache->attr);
689                 ret = sysfs_create_file(cache->kobj, &cache->attr);
690                 if (ret < 0)
691                         return ret;
692                 i++;
693         }
694
695         i = 0;
696         list_for_each_entry(iolink, &dev->io_link_props, list) {
697                 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
698                 if (!iolink->kobj)
699                         return -ENOMEM;
700                 ret = kobject_init_and_add(iolink->kobj, &iolink_type,
701                                 dev->kobj_iolink, "%d", i);
702                 if (ret < 0)
703                         return ret;
704
705                 iolink->attr.name = "properties";
706                 iolink->attr.mode = KFD_SYSFS_FILE_MODE;
707                 sysfs_attr_init(&iolink->attr);
708                 ret = sysfs_create_file(iolink->kobj, &iolink->attr);
709                 if (ret < 0)
710                         return ret;
711                 i++;
712         }
713
714         /* All hardware blocks have the same number of attributes. */
715         num_attrs = ARRAY_SIZE(perf_attr_iommu);
716         list_for_each_entry(perf, &dev->perf_props, list) {
717                 perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
718                         * num_attrs + sizeof(struct attribute_group),
719                         GFP_KERNEL);
720                 if (!perf->attr_group)
721                         return -ENOMEM;
722
723                 attrs = (struct attribute **)(perf->attr_group + 1);
724                 if (!strcmp(perf->block_name, "iommu")) {
725                 /* Information of IOMMU's num_counters and counter_ids is shown
726                  * under /sys/bus/event_source/devices/amd_iommu. We don't
727                  * duplicate here.
728                  */
729                         perf_attr_iommu[0].data = perf->max_concurrent;
730                         for (i = 0; i < num_attrs; i++)
731                                 attrs[i] = &perf_attr_iommu[i].attr.attr;
732                 }
733                 perf->attr_group->name = perf->block_name;
734                 perf->attr_group->attrs = attrs;
735                 ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
736                 if (ret < 0)
737                         return ret;
738         }
739
740         return 0;
741 }
742
743 /* Called with write topology lock acquired */
744 static int kfd_build_sysfs_node_tree(void)
745 {
746         struct kfd_topology_device *dev;
747         int ret;
748         uint32_t i = 0;
749
750         list_for_each_entry(dev, &topology_device_list, list) {
751                 ret = kfd_build_sysfs_node_entry(dev, i);
752                 if (ret < 0)
753                         return ret;
754                 i++;
755         }
756
757         return 0;
758 }
759
760 /* Called with write topology lock acquired */
761 static void kfd_remove_sysfs_node_tree(void)
762 {
763         struct kfd_topology_device *dev;
764
765         list_for_each_entry(dev, &topology_device_list, list)
766                 kfd_remove_sysfs_node_entry(dev);
767 }
768
769 static int kfd_topology_update_sysfs(void)
770 {
771         int ret;
772
773         pr_info("Creating topology SYSFS entries\n");
774         if (!sys_props.kobj_topology) {
775                 sys_props.kobj_topology =
776                                 kfd_alloc_struct(sys_props.kobj_topology);
777                 if (!sys_props.kobj_topology)
778                         return -ENOMEM;
779
780                 ret = kobject_init_and_add(sys_props.kobj_topology,
781                                 &sysprops_type,  &kfd_device->kobj,
782                                 "topology");
783                 if (ret < 0)
784                         return ret;
785
786                 sys_props.kobj_nodes = kobject_create_and_add("nodes",
787                                 sys_props.kobj_topology);
788                 if (!sys_props.kobj_nodes)
789                         return -ENOMEM;
790
791                 sys_props.attr_genid.name = "generation_id";
792                 sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
793                 sysfs_attr_init(&sys_props.attr_genid);
794                 ret = sysfs_create_file(sys_props.kobj_topology,
795                                 &sys_props.attr_genid);
796                 if (ret < 0)
797                         return ret;
798
799                 sys_props.attr_props.name = "system_properties";
800                 sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
801                 sysfs_attr_init(&sys_props.attr_props);
802                 ret = sysfs_create_file(sys_props.kobj_topology,
803                                 &sys_props.attr_props);
804                 if (ret < 0)
805                         return ret;
806         }
807
808         kfd_remove_sysfs_node_tree();
809
810         return kfd_build_sysfs_node_tree();
811 }
812
813 static void kfd_topology_release_sysfs(void)
814 {
815         kfd_remove_sysfs_node_tree();
816         if (sys_props.kobj_topology) {
817                 sysfs_remove_file(sys_props.kobj_topology,
818                                 &sys_props.attr_genid);
819                 sysfs_remove_file(sys_props.kobj_topology,
820                                 &sys_props.attr_props);
821                 if (sys_props.kobj_nodes) {
822                         kobject_del(sys_props.kobj_nodes);
823                         kobject_put(sys_props.kobj_nodes);
824                         sys_props.kobj_nodes = NULL;
825                 }
826                 kobject_del(sys_props.kobj_topology);
827                 kobject_put(sys_props.kobj_topology);
828                 sys_props.kobj_topology = NULL;
829         }
830 }
831
832 /* Called with write topology_lock acquired */
833 static void kfd_topology_update_device_list(struct list_head *temp_list,
834                                         struct list_head *master_list)
835 {
836         while (!list_empty(temp_list)) {
837                 list_move_tail(temp_list->next, master_list);
838                 sys_props.num_devices++;
839         }
840 }
841
842 static void kfd_debug_print_topology(void)
843 {
844         struct kfd_topology_device *dev;
845
846         down_read(&topology_lock);
847
848         dev = list_last_entry(&topology_device_list,
849                         struct kfd_topology_device, list);
850         if (dev) {
851                 if (dev->node_props.cpu_cores_count &&
852                                 dev->node_props.simd_count) {
853                         pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
854                                 dev->node_props.device_id,
855                                 dev->node_props.vendor_id);
856                 } else if (dev->node_props.cpu_cores_count)
857                         pr_info("Topology: Add CPU node\n");
858                 else if (dev->node_props.simd_count)
859                         pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
860                                 dev->node_props.device_id,
861                                 dev->node_props.vendor_id);
862         }
863         up_read(&topology_lock);
864 }
865
866 /* Helper function for intializing platform_xx members of
867  * kfd_system_properties. Uses OEM info from the last CPU/APU node.
868  */
869 static void kfd_update_system_properties(void)
870 {
871         struct kfd_topology_device *dev;
872
873         down_read(&topology_lock);
874         dev = list_last_entry(&topology_device_list,
875                         struct kfd_topology_device, list);
876         if (dev) {
877                 sys_props.platform_id =
878                         (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
879                 sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
880                 sys_props.platform_rev = dev->oem_revision;
881         }
882         up_read(&topology_lock);
883 }
884
885 static void find_system_memory(const struct dmi_header *dm,
886         void *private)
887 {
888         struct kfd_mem_properties *mem;
889         u16 mem_width, mem_clock;
890         struct kfd_topology_device *kdev =
891                 (struct kfd_topology_device *)private;
892         const u8 *dmi_data = (const u8 *)(dm + 1);
893
894         if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
895                 mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
896                 mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
897                 list_for_each_entry(mem, &kdev->mem_props, list) {
898                         if (mem_width != 0xFFFF && mem_width != 0)
899                                 mem->width = mem_width;
900                         if (mem_clock != 0)
901                                 mem->mem_clk_max = mem_clock;
902                 }
903         }
904 }
905
906 /*
907  * Performance counters information is not part of CRAT but we would like to
908  * put them in the sysfs under topology directory for Thunk to get the data.
909  * This function is called before updating the sysfs.
910  */
911 static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
912 {
913         /* These are the only counters supported so far */
914         return kfd_iommu_add_perf_counters(kdev);
915 }
916
917 /* kfd_add_non_crat_information - Add information that is not currently
918  *      defined in CRAT but is necessary for KFD topology
919  * @dev - topology device to which addition info is added
920  */
921 static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
922 {
923         /* Check if CPU only node. */
924         if (!kdev->gpu) {
925                 /* Add system memory information */
926                 dmi_walk(find_system_memory, kdev);
927         }
928         /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
929 }
930
931 /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
932  *      Ignore CRAT for all other devices. AMD APU is identified if both CPU
933  *      and GPU cores are present.
934  * @device_list - topology device list created by parsing ACPI CRAT table.
935  * @return - TRUE if invalid, FALSE is valid.
936  */
937 static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
938 {
939         struct kfd_topology_device *dev;
940
941         list_for_each_entry(dev, device_list, list) {
942                 if (dev->node_props.cpu_cores_count &&
943                         dev->node_props.simd_count)
944                         return false;
945         }
946         pr_info("Ignoring ACPI CRAT on non-APU system\n");
947         return true;
948 }
949
950 int kfd_topology_init(void)
951 {
952         void *crat_image = NULL;
953         size_t image_size = 0;
954         int ret;
955         struct list_head temp_topology_device_list;
956         int cpu_only_node = 0;
957         struct kfd_topology_device *kdev;
958         int proximity_domain;
959
960         /* topology_device_list - Master list of all topology devices
961          * temp_topology_device_list - temporary list created while parsing CRAT
962          * or VCRAT. Once parsing is complete the contents of list is moved to
963          * topology_device_list
964          */
965
966         /* Initialize the head for the both the lists */
967         INIT_LIST_HEAD(&topology_device_list);
968         INIT_LIST_HEAD(&temp_topology_device_list);
969         init_rwsem(&topology_lock);
970
971         memset(&sys_props, 0, sizeof(sys_props));
972
973         /* Proximity domains in ACPI CRAT tables start counting at
974          * 0. The same should be true for virtual CRAT tables created
975          * at this stage. GPUs added later in kfd_topology_add_device
976          * use a counter.
977          */
978         proximity_domain = 0;
979
980         /*
981          * Get the CRAT image from the ACPI. If ACPI doesn't have one
982          * or if ACPI CRAT is invalid create a virtual CRAT.
983          * NOTE: The current implementation expects all AMD APUs to have
984          *      CRAT. If no CRAT is available, it is assumed to be a CPU
985          */
986         ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
987         if (!ret) {
988                 ret = kfd_parse_crat_table(crat_image,
989                                            &temp_topology_device_list,
990                                            proximity_domain);
991                 if (ret ||
992                     kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
993                         kfd_release_topology_device_list(
994                                 &temp_topology_device_list);
995                         kfd_destroy_crat_image(crat_image);
996                         crat_image = NULL;
997                 }
998         }
999
1000         if (!crat_image) {
1001                 ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
1002                                                     COMPUTE_UNIT_CPU, NULL,
1003                                                     proximity_domain);
1004                 cpu_only_node = 1;
1005                 if (ret) {
1006                         pr_err("Error creating VCRAT table for CPU\n");
1007                         return ret;
1008                 }
1009
1010                 ret = kfd_parse_crat_table(crat_image,
1011                                            &temp_topology_device_list,
1012                                            proximity_domain);
1013                 if (ret) {
1014                         pr_err("Error parsing VCRAT table for CPU\n");
1015                         goto err;
1016                 }
1017         }
1018
1019         kdev = list_first_entry(&temp_topology_device_list,
1020                                 struct kfd_topology_device, list);
1021         kfd_add_perf_to_topology(kdev);
1022
1023         down_write(&topology_lock);
1024         kfd_topology_update_device_list(&temp_topology_device_list,
1025                                         &topology_device_list);
1026         atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
1027         ret = kfd_topology_update_sysfs();
1028         up_write(&topology_lock);
1029
1030         if (!ret) {
1031                 sys_props.generation_count++;
1032                 kfd_update_system_properties();
1033                 kfd_debug_print_topology();
1034                 pr_info("Finished initializing topology\n");
1035         } else
1036                 pr_err("Failed to update topology in sysfs ret=%d\n", ret);
1037
1038         /* For nodes with GPU, this information gets added
1039          * when GPU is detected (kfd_topology_add_device).
1040          */
1041         if (cpu_only_node) {
1042                 /* Add additional information to CPU only node created above */
1043                 down_write(&topology_lock);
1044                 kdev = list_first_entry(&topology_device_list,
1045                                 struct kfd_topology_device, list);
1046                 up_write(&topology_lock);
1047                 kfd_add_non_crat_information(kdev);
1048         }
1049
1050 err:
1051         kfd_destroy_crat_image(crat_image);
1052         return ret;
1053 }
1054
1055 void kfd_topology_shutdown(void)
1056 {
1057         down_write(&topology_lock);
1058         kfd_topology_release_sysfs();
1059         kfd_release_live_view();
1060         up_write(&topology_lock);
1061 }
1062
1063 static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
1064 {
1065         uint32_t hashout;
1066         uint32_t buf[7];
1067         uint64_t local_mem_size;
1068         int i;
1069         struct kfd_local_mem_info local_mem_info;
1070
1071         if (!gpu)
1072                 return 0;
1073
1074         amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
1075
1076         local_mem_size = local_mem_info.local_mem_size_private +
1077                         local_mem_info.local_mem_size_public;
1078
1079         buf[0] = gpu->pdev->devfn;
1080         buf[1] = gpu->pdev->subsystem_vendor;
1081         buf[2] = gpu->pdev->subsystem_device;
1082         buf[3] = gpu->pdev->device;
1083         buf[4] = gpu->pdev->bus->number;
1084         buf[5] = lower_32_bits(local_mem_size);
1085         buf[6] = upper_32_bits(local_mem_size);
1086
1087         for (i = 0, hashout = 0; i < 7; i++)
1088                 hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
1089
1090         return hashout;
1091 }
1092 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1093  *              the GPU device is not already present in the topology device
1094  *              list then return NULL. This means a new topology device has to
1095  *              be created for this GPU.
1096  */
1097 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
1098 {
1099         struct kfd_topology_device *dev;
1100         struct kfd_topology_device *out_dev = NULL;
1101
1102         down_write(&topology_lock);
1103         list_for_each_entry(dev, &topology_device_list, list) {
1104                 /* Discrete GPUs need their own topology device list
1105                  * entries. Don't assign them to CPU/APU nodes.
1106                  */
1107                 if (!gpu->device_info->needs_iommu_device &&
1108                     dev->node_props.cpu_cores_count)
1109                         continue;
1110
1111                 if (!dev->gpu && (dev->node_props.simd_count > 0)) {
1112                         dev->gpu = gpu;
1113                         out_dev = dev;
1114                         break;
1115                 }
1116         }
1117         up_write(&topology_lock);
1118         return out_dev;
1119 }
1120
1121 static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
1122 {
1123         /*
1124          * TODO: Generate an event for thunk about the arrival/removal
1125          * of the GPU
1126          */
1127 }
1128
1129 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1130  *              patch this after CRAT parsing.
1131  */
1132 static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
1133 {
1134         struct kfd_mem_properties *mem;
1135         struct kfd_local_mem_info local_mem_info;
1136
1137         if (!dev)
1138                 return;
1139
1140         /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
1141          * single bank of VRAM local memory.
1142          * for dGPUs - VCRAT reports only one bank of Local Memory
1143          * for APUs - If CRAT from ACPI reports more than one bank, then
1144          *      all the banks will report the same mem_clk_max information
1145          */
1146         amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
1147
1148         list_for_each_entry(mem, &dev->mem_props, list)
1149                 mem->mem_clk_max = local_mem_info.mem_clk_max;
1150 }
1151
1152 static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
1153 {
1154         struct kfd_iolink_properties *link, *cpu_link;
1155         struct kfd_topology_device *cpu_dev;
1156         uint32_t cap;
1157         uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
1158         uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
1159
1160         if (!dev || !dev->gpu)
1161                 return;
1162
1163         pcie_capability_read_dword(dev->gpu->pdev,
1164                         PCI_EXP_DEVCAP2, &cap);
1165
1166         if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
1167                      PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
1168                 cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1169                         CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1170
1171         if (!dev->gpu->pci_atomic_requested ||
1172             dev->gpu->device_info->asic_family == CHIP_HAWAII)
1173                 flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1174                         CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1175
1176         /* GPU only creates direct links so apply flags setting to all */
1177         list_for_each_entry(link, &dev->io_link_props, list) {
1178                 link->flags = flag;
1179                 cpu_dev = kfd_topology_device_by_proximity_domain(
1180                                 link->node_to);
1181                 if (cpu_dev) {
1182                         list_for_each_entry(cpu_link,
1183                                             &cpu_dev->io_link_props, list)
1184                                 if (cpu_link->node_to == link->node_from)
1185                                         cpu_link->flags = cpu_flag;
1186                 }
1187         }
1188 }
1189
1190 int kfd_topology_add_device(struct kfd_dev *gpu)
1191 {
1192         uint32_t gpu_id;
1193         struct kfd_topology_device *dev;
1194         struct kfd_cu_info cu_info;
1195         int res = 0;
1196         struct list_head temp_topology_device_list;
1197         void *crat_image = NULL;
1198         size_t image_size = 0;
1199         int proximity_domain;
1200
1201         INIT_LIST_HEAD(&temp_topology_device_list);
1202
1203         gpu_id = kfd_generate_gpu_id(gpu);
1204
1205         pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
1206
1207         proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
1208
1209         /* Check to see if this gpu device exists in the topology_device_list.
1210          * If so, assign the gpu to that device,
1211          * else create a Virtual CRAT for this gpu device and then parse that
1212          * CRAT to create a new topology device. Once created assign the gpu to
1213          * that topology device
1214          */
1215         dev = kfd_assign_gpu(gpu);
1216         if (!dev) {
1217                 res = kfd_create_crat_image_virtual(&crat_image, &image_size,
1218                                                     COMPUTE_UNIT_GPU, gpu,
1219                                                     proximity_domain);
1220                 if (res) {
1221                         pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
1222                                gpu_id);
1223                         return res;
1224                 }
1225                 res = kfd_parse_crat_table(crat_image,
1226                                            &temp_topology_device_list,
1227                                            proximity_domain);
1228                 if (res) {
1229                         pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
1230                                gpu_id);
1231                         goto err;
1232                 }
1233
1234                 down_write(&topology_lock);
1235                 kfd_topology_update_device_list(&temp_topology_device_list,
1236                         &topology_device_list);
1237
1238                 /* Update the SYSFS tree, since we added another topology
1239                  * device
1240                  */
1241                 res = kfd_topology_update_sysfs();
1242                 up_write(&topology_lock);
1243
1244                 if (!res)
1245                         sys_props.generation_count++;
1246                 else
1247                         pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
1248                                                 gpu_id, res);
1249                 dev = kfd_assign_gpu(gpu);
1250                 if (WARN_ON(!dev)) {
1251                         res = -ENODEV;
1252                         goto err;
1253                 }
1254         }
1255
1256         dev->gpu_id = gpu_id;
1257         gpu->id = gpu_id;
1258
1259         /* TODO: Move the following lines to function
1260          *      kfd_add_non_crat_information
1261          */
1262
1263         /* Fill-in additional information that is not available in CRAT but
1264          * needed for the topology
1265          */
1266
1267         amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
1268         dev->node_props.simd_arrays_per_engine =
1269                 cu_info.num_shader_arrays_per_engine;
1270
1271         dev->node_props.vendor_id = gpu->pdev->vendor;
1272         dev->node_props.device_id = gpu->pdev->device;
1273         dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
1274                 gpu->pdev->devfn);
1275         dev->node_props.max_engine_clk_fcompute =
1276                 amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
1277         dev->node_props.max_engine_clk_ccompute =
1278                 cpufreq_quick_get_max(0) / 1000;
1279         dev->node_props.drm_render_minor =
1280                 gpu->shared_resources.drm_render_minor;
1281
1282         dev->node_props.hive_id = gpu->hive_id;
1283
1284         kfd_fill_mem_clk_max_info(dev);
1285         kfd_fill_iolink_non_crat_info(dev);
1286
1287         switch (dev->gpu->device_info->asic_family) {
1288         case CHIP_KAVERI:
1289         case CHIP_HAWAII:
1290         case CHIP_TONGA:
1291                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
1292                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1293                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1294                 break;
1295         case CHIP_CARRIZO:
1296         case CHIP_FIJI:
1297         case CHIP_POLARIS10:
1298         case CHIP_POLARIS11:
1299         case CHIP_POLARIS12:
1300                 pr_debug("Adding doorbell packet type capability\n");
1301                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
1302                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1303                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1304                 break;
1305         case CHIP_VEGA10:
1306         case CHIP_VEGA12:
1307         case CHIP_VEGA20:
1308         case CHIP_RAVEN:
1309                 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
1310                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1311                         HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1312                 break;
1313         default:
1314                 WARN(1, "Unexpected ASIC family %u",
1315                      dev->gpu->device_info->asic_family);
1316         }
1317
1318         /* Fix errors in CZ CRAT.
1319          * simd_count: Carrizo CRAT reports wrong simd_count, probably
1320          *              because it doesn't consider masked out CUs
1321          * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
1322          * capability flag: Carrizo CRAT doesn't report IOMMU flags
1323          */
1324         if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
1325                 dev->node_props.simd_count =
1326                         cu_info.simd_per_cu * cu_info.cu_active_number;
1327                 dev->node_props.max_waves_per_simd = 10;
1328                 dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
1329         }
1330
1331         kfd_debug_print_topology();
1332
1333         if (!res)
1334                 kfd_notify_gpu_change(gpu_id, 1);
1335 err:
1336         kfd_destroy_crat_image(crat_image);
1337         return res;
1338 }
1339
1340 int kfd_topology_remove_device(struct kfd_dev *gpu)
1341 {
1342         struct kfd_topology_device *dev, *tmp;
1343         uint32_t gpu_id;
1344         int res = -ENODEV;
1345
1346         down_write(&topology_lock);
1347
1348         list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
1349                 if (dev->gpu == gpu) {
1350                         gpu_id = dev->gpu_id;
1351                         kfd_remove_sysfs_node_entry(dev);
1352                         kfd_release_topology_device(dev);
1353                         sys_props.num_devices--;
1354                         res = 0;
1355                         if (kfd_topology_update_sysfs() < 0)
1356                                 kfd_topology_release_sysfs();
1357                         break;
1358                 }
1359
1360         up_write(&topology_lock);
1361
1362         if (!res)
1363                 kfd_notify_gpu_change(gpu_id, 0);
1364
1365         return res;
1366 }
1367
1368 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
1369  *      topology. If GPU device is found @idx, then valid kfd_dev pointer is
1370  *      returned through @kdev
1371  * Return -     0: On success (@kdev will be NULL for non GPU nodes)
1372  *              -1: If end of list
1373  */
1374 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
1375 {
1376
1377         struct kfd_topology_device *top_dev;
1378         uint8_t device_idx = 0;
1379
1380         *kdev = NULL;
1381         down_read(&topology_lock);
1382
1383         list_for_each_entry(top_dev, &topology_device_list, list) {
1384                 if (device_idx == idx) {
1385                         *kdev = top_dev->gpu;
1386                         up_read(&topology_lock);
1387                         return 0;
1388                 }
1389
1390                 device_idx++;
1391         }
1392
1393         up_read(&topology_lock);
1394
1395         return -1;
1396
1397 }
1398
1399 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
1400 {
1401         int first_cpu_of_numa_node;
1402
1403         if (!cpumask || cpumask == cpu_none_mask)
1404                 return -1;
1405         first_cpu_of_numa_node = cpumask_first(cpumask);
1406         if (first_cpu_of_numa_node >= nr_cpu_ids)
1407                 return -1;
1408 #ifdef CONFIG_X86_64
1409         return cpu_data(first_cpu_of_numa_node).apicid;
1410 #else
1411         return first_cpu_of_numa_node;
1412 #endif
1413 }
1414
1415 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
1416  *      of the given NUMA node (numa_node_id)
1417  * Return -1 on failure
1418  */
1419 int kfd_numa_node_to_apic_id(int numa_node_id)
1420 {
1421         if (numa_node_id == -1) {
1422                 pr_warn("Invalid NUMA Node. Use online CPU mask\n");
1423                 return kfd_cpumask_to_apic_id(cpu_online_mask);
1424         }
1425         return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
1426 }
1427
1428 #if defined(CONFIG_DEBUG_FS)
1429
1430 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
1431 {
1432         struct kfd_topology_device *dev;
1433         unsigned int i = 0;
1434         int r = 0;
1435
1436         down_read(&topology_lock);
1437
1438         list_for_each_entry(dev, &topology_device_list, list) {
1439                 if (!dev->gpu) {
1440                         i++;
1441                         continue;
1442                 }
1443
1444                 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1445                 r = dqm_debugfs_hqds(m, dev->gpu->dqm);
1446                 if (r)
1447                         break;
1448         }
1449
1450         up_read(&topology_lock);
1451
1452         return r;
1453 }
1454
1455 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
1456 {
1457         struct kfd_topology_device *dev;
1458         unsigned int i = 0;
1459         int r = 0;
1460
1461         down_read(&topology_lock);
1462
1463         list_for_each_entry(dev, &topology_device_list, list) {
1464                 if (!dev->gpu) {
1465                         i++;
1466                         continue;
1467                 }
1468
1469                 seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1470                 r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
1471                 if (r)
1472                         break;
1473         }
1474
1475         up_read(&topology_lock);
1476
1477         return r;
1478 }
1479
1480 #endif