1 // SPDX-License-Identifier: MIT
3 * Copyright © 2022 Intel Corporation
8 #include <linux/nospec.h>
9 #include <linux/sched/clock.h>
11 #include <drm/ttm/ttm_placement.h>
12 #include <drm/xe_drm.h>
14 #include "regs/xe_engine_regs.h"
16 #include "xe_device.h"
17 #include "xe_exec_queue.h"
20 #include "xe_guc_hwconfig.h"
21 #include "xe_macros.h"
23 #include "xe_ttm_vram_mgr.h"
25 static const u16 xe_to_user_engine_class[] = {
26 [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
27 [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
28 [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
29 [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
30 [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
33 static const enum xe_engine_class user_to_xe_engine_class[] = {
34 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
35 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
36 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
37 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
38 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
41 static size_t calc_hw_engine_info_size(struct xe_device *xe)
43 struct xe_hw_engine *hwe;
44 enum xe_hw_engine_id id;
49 for_each_gt(gt, xe, gt_id)
50 for_each_hw_engine(hwe, gt, id) {
51 if (xe_hw_engine_is_reserved(hwe))
56 return sizeof(struct drm_xe_query_engines) +
57 i * sizeof(struct drm_xe_engine);
60 typedef u64 (*__ktime_func_t)(void);
61 static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
64 * Use logic same as the perf subsystem to allow user to select the
65 * reference clock id to be used for timestamps.
70 case CLOCK_MONOTONIC_RAW:
71 return &ktime_get_raw_ns;
73 return &ktime_get_real_ns;
75 return &ktime_get_boottime_ns;
77 return &ktime_get_clocktai_ns;
84 __read_timestamps(struct xe_gt *gt,
85 struct xe_reg lower_reg,
86 struct xe_reg upper_reg,
90 __ktime_func_t cpu_clock)
92 u32 upper, lower, old_upper, loop = 0;
94 upper = xe_mmio_read32(gt, upper_reg);
96 *cpu_delta = local_clock();
97 *cpu_ts = cpu_clock();
98 lower = xe_mmio_read32(gt, lower_reg);
99 *cpu_delta = local_clock() - *cpu_delta;
101 upper = xe_mmio_read32(gt, upper_reg);
102 } while (upper != old_upper && loop++ < 2);
104 *engine_ts = (u64)upper << 32 | lower;
108 query_engine_cycles(struct xe_device *xe,
109 struct drm_xe_device_query *query)
111 struct drm_xe_query_engine_cycles __user *query_ptr;
112 struct drm_xe_engine_class_instance *eci;
113 struct drm_xe_query_engine_cycles resp;
114 size_t size = sizeof(resp);
115 __ktime_func_t cpu_clock;
116 struct xe_hw_engine *hwe;
119 if (query->size == 0) {
122 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
126 query_ptr = u64_to_user_ptr(query->data);
127 if (copy_from_user(&resp, query_ptr, size))
130 cpu_clock = __clock_id_to_func(resp.clockid);
135 if (eci->gt_id > XE_MAX_GT_PER_TILE)
138 gt = xe_device_get_gt(xe, eci->gt_id);
142 if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
145 hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
146 eci->engine_instance, true);
150 xe_device_mem_access_get(xe);
151 xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
153 __read_timestamps(gt,
154 RING_TIMESTAMP(hwe->mmio_base),
155 RING_TIMESTAMP_UDW(hwe->mmio_base),
161 xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
162 xe_device_mem_access_put(xe);
165 /* Only write to the output fields of user query */
166 if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
169 if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
172 if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
175 if (put_user(resp.width, &query_ptr->width))
181 static int query_engines(struct xe_device *xe,
182 struct drm_xe_device_query *query)
184 size_t size = calc_hw_engine_info_size(xe);
185 struct drm_xe_query_engines __user *query_ptr =
186 u64_to_user_ptr(query->data);
187 struct drm_xe_query_engines *engines;
188 struct xe_hw_engine *hwe;
189 enum xe_hw_engine_id id;
194 if (query->size == 0) {
197 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
201 engines = kzalloc(size, GFP_KERNEL);
205 for_each_gt(gt, xe, gt_id)
206 for_each_hw_engine(hwe, gt, id) {
207 if (xe_hw_engine_is_reserved(hwe))
210 engines->engines[i].instance.engine_class =
211 xe_to_user_engine_class[hwe->class];
212 engines->engines[i].instance.engine_instance =
213 hwe->logical_instance;
214 engines->engines[i].instance.gt_id = gt->info.id;
219 engines->num_engines = i;
221 if (copy_to_user(query_ptr, engines, size)) {
230 static size_t calc_mem_regions_size(struct xe_device *xe)
232 u32 num_managers = 1;
235 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i)
236 if (ttm_manager_type(&xe->ttm, i))
239 return offsetof(struct drm_xe_query_mem_regions, mem_regions[num_managers]);
242 static int query_mem_regions(struct xe_device *xe,
243 struct drm_xe_device_query *query)
245 size_t size = calc_mem_regions_size(xe);
246 struct drm_xe_query_mem_regions *mem_regions;
247 struct drm_xe_query_mem_regions __user *query_ptr =
248 u64_to_user_ptr(query->data);
249 struct ttm_resource_manager *man;
252 if (query->size == 0) {
255 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
259 mem_regions = kzalloc(size, GFP_KERNEL);
260 if (XE_IOCTL_DBG(xe, !mem_regions))
263 man = ttm_manager_type(&xe->ttm, XE_PL_TT);
264 mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM;
266 * The instance needs to be a unique number that represents the index
267 * in the placement mask used at xe_gem_create_ioctl() for the
268 * xe_bo_create() placement.
270 mem_regions->mem_regions[0].instance = 0;
271 mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
272 mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
273 if (perfmon_capable())
274 mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
275 mem_regions->num_mem_regions = 1;
277 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
278 man = ttm_manager_type(&xe->ttm, i);
280 mem_regions->mem_regions[mem_regions->num_mem_regions].mem_class =
281 DRM_XE_MEM_REGION_CLASS_VRAM;
282 mem_regions->mem_regions[mem_regions->num_mem_regions].instance =
283 mem_regions->num_mem_regions;
284 mem_regions->mem_regions[mem_regions->num_mem_regions].min_page_size =
285 xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
287 mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
290 if (perfmon_capable()) {
291 xe_ttm_vram_get_used(man,
292 &mem_regions->mem_regions
293 [mem_regions->num_mem_regions].used,
294 &mem_regions->mem_regions
295 [mem_regions->num_mem_regions].cpu_visible_used);
298 mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
299 xe_ttm_vram_get_cpu_visible_size(man);
300 mem_regions->num_mem_regions++;
304 if (!copy_to_user(query_ptr, mem_regions, size))
313 static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
315 const u32 num_params = DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1;
317 sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
318 struct drm_xe_query_config __user *query_ptr =
319 u64_to_user_ptr(query->data);
320 struct drm_xe_query_config *config;
322 if (query->size == 0) {
325 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
329 config = kzalloc(size, GFP_KERNEL);
333 config->num_params = num_params;
334 config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
335 xe->info.devid | (xe->info.revid << 16);
336 if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
337 config->info[DRM_XE_QUERY_CONFIG_FLAGS] =
338 DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
339 config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
340 xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
341 config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
342 config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] =
343 xe_exec_queue_device_get_max_priority(xe);
345 if (copy_to_user(query_ptr, config, size)) {
354 static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query)
357 size_t size = sizeof(struct drm_xe_query_gt_list) +
358 xe->info.gt_count * sizeof(struct drm_xe_gt);
359 struct drm_xe_query_gt_list __user *query_ptr =
360 u64_to_user_ptr(query->data);
361 struct drm_xe_query_gt_list *gt_list;
364 if (query->size == 0) {
367 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
371 gt_list = kzalloc(size, GFP_KERNEL);
375 gt_list->num_gt = xe->info.gt_count;
377 for_each_gt(gt, xe, id) {
378 if (xe_gt_is_media_type(gt))
379 gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
381 gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN;
382 gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id;
383 gt_list->gt_list[id].gt_id = gt->info.id;
384 gt_list->gt_list[id].reference_clock = gt->info.reference_clock;
386 * The mem_regions indexes in the mask below need to
387 * directly identify the struct
388 * drm_xe_query_mem_regions' instance constructed at
389 * query_mem_regions()
391 * For our current platforms:
392 * Bit 0 -> System Memory
393 * Bit 1 -> VRAM0 on Tile0
394 * Bit 2 -> VRAM1 on Tile1
395 * However the uAPI is generic and it's userspace's
396 * responsibility to check the mem_class, without any
400 gt_list->gt_list[id].near_mem_regions = 0x1;
402 gt_list->gt_list[id].near_mem_regions =
403 BIT(gt_to_tile(gt)->id) << 1;
404 gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^
405 gt_list->gt_list[id].near_mem_regions;
408 if (copy_to_user(query_ptr, gt_list, size)) {
417 static int query_hwconfig(struct xe_device *xe,
418 struct drm_xe_device_query *query)
420 struct xe_gt *gt = xe_root_mmio_gt(xe);
421 size_t size = xe_guc_hwconfig_size(>->uc.guc);
422 void __user *query_ptr = u64_to_user_ptr(query->data);
425 if (query->size == 0) {
428 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
432 hwconfig = kzalloc(size, GFP_KERNEL);
436 xe_device_mem_access_get(xe);
437 xe_guc_hwconfig_copy(>->uc.guc, hwconfig);
438 xe_device_mem_access_put(xe);
440 if (copy_to_user(query_ptr, hwconfig, size)) {
449 static size_t calc_topo_query_size(struct xe_device *xe)
451 return xe->info.gt_count *
452 (3 * sizeof(struct drm_xe_query_topology_mask) +
453 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
454 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
455 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
458 static int copy_mask(void __user **ptr,
459 struct drm_xe_query_topology_mask *topo,
460 void *mask, size_t mask_size)
462 topo->num_bytes = mask_size;
464 if (copy_to_user(*ptr, topo, sizeof(*topo)))
466 *ptr += sizeof(topo);
468 if (copy_to_user(*ptr, mask, mask_size))
475 static int query_gt_topology(struct xe_device *xe,
476 struct drm_xe_device_query *query)
478 void __user *query_ptr = u64_to_user_ptr(query->data);
479 size_t size = calc_topo_query_size(xe);
480 struct drm_xe_query_topology_mask topo;
484 if (query->size == 0) {
487 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
491 for_each_gt(gt, xe, id) {
496 topo.type = DRM_XE_TOPO_DSS_GEOMETRY;
497 err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask,
498 sizeof(gt->fuse_topo.g_dss_mask));
502 topo.type = DRM_XE_TOPO_DSS_COMPUTE;
503 err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask,
504 sizeof(gt->fuse_topo.c_dss_mask));
508 topo.type = DRM_XE_TOPO_EU_PER_DSS;
509 err = copy_mask(&query_ptr, &topo,
510 gt->fuse_topo.eu_mask_per_dss,
511 sizeof(gt->fuse_topo.eu_mask_per_dss));
520 query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query)
522 struct drm_xe_query_uc_fw_version __user *query_ptr = u64_to_user_ptr(query->data);
523 size_t size = sizeof(struct drm_xe_query_uc_fw_version);
524 struct drm_xe_query_uc_fw_version resp;
525 struct xe_uc_fw_version *version = NULL;
527 if (query->size == 0) {
530 } else if (XE_IOCTL_DBG(xe, query->size != size)) {
534 if (copy_from_user(&resp, query_ptr, size))
537 if (XE_IOCTL_DBG(xe, resp.pad || resp.pad2 || resp.reserved))
540 switch (resp.uc_type) {
541 case XE_QUERY_UC_TYPE_GUC_SUBMISSION: {
542 struct xe_guc *guc = &xe->tiles[0].primary_gt->uc.guc;
544 version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
552 resp.major_ver = version->major;
553 resp.minor_ver = version->minor;
554 resp.patch_ver = version->patch;
556 if (copy_to_user(query_ptr, &resp, size))
562 static int (* const xe_query_funcs[])(struct xe_device *xe,
563 struct drm_xe_device_query *query) = {
574 int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
576 struct xe_device *xe = to_xe_device(dev);
577 struct drm_xe_device_query *query = data;
580 if (XE_IOCTL_DBG(xe, query->extensions) ||
581 XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1]))
584 if (XE_IOCTL_DBG(xe, query->query >= ARRAY_SIZE(xe_query_funcs)))
587 idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
588 if (XE_IOCTL_DBG(xe, !xe_query_funcs[idx]))
591 return xe_query_funcs[idx](xe, query);