Merge tag 'amd-drm-next-6.7-2023-10-13' of https://gitlab.freedesktop.org/agd5f/linux...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / mes_v10_1.c
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "amdgpu.h"
27 #include "soc15_common.h"
28 #include "nv.h"
29 #include "gc/gc_10_1_0_offset.h"
30 #include "gc/gc_10_1_0_sh_mask.h"
31 #include "gc/gc_10_1_0_default.h"
32 #include "v10_structs.h"
33 #include "mes_api_def.h"
34
35 #define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid               0x2820
36 #define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX      1
37 #define mmRLC_CP_SCHEDULERS_Sienna_Cichlid              0x4ca1
38 #define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX     1
39
40 MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
41 MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
42 MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
43
44 static int mes_v10_1_hw_fini(void *handle);
45 static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev);
46
47 #define MES_EOP_SIZE   2048
48
49 static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
50 {
51         struct amdgpu_device *adev = ring->adev;
52
53         if (ring->use_doorbell) {
54                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
55                              ring->wptr);
56                 WDOORBELL64(ring->doorbell_index, ring->wptr);
57         } else {
58                 BUG();
59         }
60 }
61
62 static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64         return *ring->rptr_cpu_addr;
65 }
66
67 static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
68 {
69         u64 wptr;
70
71         if (ring->use_doorbell)
72                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
73         else
74                 BUG();
75         return wptr;
76 }
77
78 static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
79         .type = AMDGPU_RING_TYPE_MES,
80         .align_mask = 1,
81         .nop = 0,
82         .support_64bit_ptrs = true,
83         .get_rptr = mes_v10_1_ring_get_rptr,
84         .get_wptr = mes_v10_1_ring_get_wptr,
85         .set_wptr = mes_v10_1_ring_set_wptr,
86         .insert_nop = amdgpu_ring_insert_nop,
87 };
88
89 static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
90                                                     void *pkt, int size,
91                                                     int api_status_off)
92 {
93         int ndw = size / 4;
94         signed long r;
95         union MESAPI__ADD_QUEUE *x_pkt = pkt;
96         struct MES_API_STATUS *api_status;
97         struct amdgpu_device *adev = mes->adev;
98         struct amdgpu_ring *ring = &mes->ring;
99         unsigned long flags;
100
101         BUG_ON(size % 4 != 0);
102
103         spin_lock_irqsave(&mes->ring_lock, flags);
104         if (amdgpu_ring_alloc(ring, ndw)) {
105                 spin_unlock_irqrestore(&mes->ring_lock, flags);
106                 return -ENOMEM;
107         }
108
109         api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
110         api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
111         api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
112
113         amdgpu_ring_write_multiple(ring, pkt, ndw);
114         amdgpu_ring_commit(ring);
115         spin_unlock_irqrestore(&mes->ring_lock, flags);
116
117         DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
118
119         r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
120                                       adev->usec_timeout);
121         if (r < 1) {
122                 DRM_ERROR("MES failed to response msg=%d\n",
123                           x_pkt->header.opcode);
124
125                 while (halt_if_hws_hang)
126                         schedule();
127
128                 return -ETIMEDOUT;
129         }
130
131         return 0;
132 }
133
134 static int convert_to_mes_queue_type(int queue_type)
135 {
136         if (queue_type == AMDGPU_RING_TYPE_GFX)
137                 return MES_QUEUE_TYPE_GFX;
138         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
139                 return MES_QUEUE_TYPE_COMPUTE;
140         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
141                 return MES_QUEUE_TYPE_SDMA;
142         else
143                 BUG();
144         return -1;
145 }
146
147 static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
148                                   struct mes_add_queue_input *input)
149 {
150         struct amdgpu_device *adev = mes->adev;
151         union MESAPI__ADD_QUEUE mes_add_queue_pkt;
152         struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
153         uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
154
155         memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
156
157         mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
158         mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
159         mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
160
161         mes_add_queue_pkt.process_id = input->process_id;
162         mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
163         mes_add_queue_pkt.process_va_start = input->process_va_start;
164         mes_add_queue_pkt.process_va_end = input->process_va_end;
165         mes_add_queue_pkt.process_quantum = input->process_quantum;
166         mes_add_queue_pkt.process_context_addr = input->process_context_addr;
167         mes_add_queue_pkt.gang_quantum = input->gang_quantum;
168         mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
169         mes_add_queue_pkt.inprocess_gang_priority =
170                 input->inprocess_gang_priority;
171         mes_add_queue_pkt.gang_global_priority_level =
172                 input->gang_global_priority_level;
173         mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
174         mes_add_queue_pkt.mqd_addr = input->mqd_addr;
175         mes_add_queue_pkt.wptr_addr = input->wptr_addr;
176         mes_add_queue_pkt.queue_type =
177                 convert_to_mes_queue_type(input->queue_type);
178         mes_add_queue_pkt.paging = input->paging;
179         mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
180         mes_add_queue_pkt.gws_base = input->gws_base;
181         mes_add_queue_pkt.gws_size = input->gws_size;
182         mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
183
184         return mes_v10_1_submit_pkt_and_poll_completion(mes,
185                         &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
186                         offsetof(union MESAPI__ADD_QUEUE, api_status));
187 }
188
189 static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
190                                      struct mes_remove_queue_input *input)
191 {
192         union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
193
194         memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
195
196         mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
197         mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
198         mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
199
200         mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
201         mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
202
203         return mes_v10_1_submit_pkt_and_poll_completion(mes,
204                         &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
205                         offsetof(union MESAPI__REMOVE_QUEUE, api_status));
206 }
207
208 static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
209                                  struct mes_unmap_legacy_queue_input *input)
210 {
211         union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
212
213         memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
214
215         mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
216         mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
217         mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
218
219         mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
220         mes_remove_queue_pkt.gang_context_addr = 0;
221
222         mes_remove_queue_pkt.pipe_id = input->pipe_id;
223         mes_remove_queue_pkt.queue_id = input->queue_id;
224
225         if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
226                 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
227                 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
228                 mes_remove_queue_pkt.tf_data =
229                         lower_32_bits(input->trail_fence_data);
230         } else {
231                 if (input->queue_type == AMDGPU_RING_TYPE_GFX)
232                         mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
233                 else
234                         mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
235         }
236
237         return mes_v10_1_submit_pkt_and_poll_completion(mes,
238                         &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
239                         offsetof(union MESAPI__REMOVE_QUEUE, api_status));
240 }
241
242 static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
243                                   struct mes_suspend_gang_input *input)
244 {
245         return 0;
246 }
247
248 static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
249                                  struct mes_resume_gang_input *input)
250 {
251         return 0;
252 }
253
254 static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
255 {
256         union MESAPI__QUERY_MES_STATUS mes_status_pkt;
257
258         memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
259
260         mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
261         mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
262         mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
263
264         return mes_v10_1_submit_pkt_and_poll_completion(mes,
265                         &mes_status_pkt, sizeof(mes_status_pkt),
266                         offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
267 }
268
269 static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
270 {
271         int i;
272         struct amdgpu_device *adev = mes->adev;
273         union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
274
275         memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
276
277         mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
278         mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
279         mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
280
281         mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
282         mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
283         mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
284         mes_set_hw_res_pkt.paging_vmid = 0;
285         mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
286         mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
287                 mes->query_status_fence_gpu_addr;
288
289         for (i = 0; i < MAX_COMPUTE_PIPES; i++)
290                 mes_set_hw_res_pkt.compute_hqd_mask[i] =
291                         mes->compute_hqd_mask[i];
292
293         for (i = 0; i < MAX_GFX_PIPES; i++)
294                 mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
295
296         for (i = 0; i < MAX_SDMA_PIPES; i++)
297                 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
298
299         for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
300                 mes_set_hw_res_pkt.aggregated_doorbells[i] =
301                         mes->aggregated_doorbells[i];
302
303         for (i = 0; i < 5; i++) {
304                 mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
305                 mes_set_hw_res_pkt.mmhub_base[i] =
306                         adev->reg_offset[MMHUB_HWIP][0][i];
307                 mes_set_hw_res_pkt.osssys_base[i] =
308                         adev->reg_offset[OSSSYS_HWIP][0][i];
309         }
310
311         mes_set_hw_res_pkt.disable_reset = 1;
312         mes_set_hw_res_pkt.disable_mes_log = 1;
313         mes_set_hw_res_pkt.use_different_vmid_compute = 1;
314
315         return mes_v10_1_submit_pkt_and_poll_completion(mes,
316                         &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
317                         offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
318 }
319
320 static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes)
321 {
322         struct amdgpu_device *adev = mes->adev;
323         uint32_t data;
324
325         data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1);
326         data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
327                   CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
328                   CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
329         data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
330                 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
331         data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
332         WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data);
333
334         data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2);
335         data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
336                   CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
337                   CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
338         data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
339                 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
340         data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
341         WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data);
342
343         data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3);
344         data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
345                   CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
346                   CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
347         data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
348                 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
349         data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
350         WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data);
351
352         data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4);
353         data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
354                   CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
355                   CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
356         data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
357                 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
358         data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
359         WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data);
360
361         data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5);
362         data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
363                   CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
364                   CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
365         data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
366                 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
367         data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
368         WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data);
369
370         data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
371         WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data);
372 }
373
374 static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
375         .add_hw_queue = mes_v10_1_add_hw_queue,
376         .remove_hw_queue = mes_v10_1_remove_hw_queue,
377         .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
378         .suspend_gang = mes_v10_1_suspend_gang,
379         .resume_gang = mes_v10_1_resume_gang,
380 };
381
382 static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev,
383                                            enum admgpu_mes_pipe pipe)
384 {
385         int r;
386         const struct mes_firmware_header_v1_0 *mes_hdr;
387         const __le32 *fw_data;
388         unsigned fw_size;
389
390         mes_hdr = (const struct mes_firmware_header_v1_0 *)
391                 adev->mes.fw[pipe]->data;
392
393         fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
394                    le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
395         fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
396
397         r = amdgpu_bo_create_reserved(adev, fw_size,
398                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
399                                       &adev->mes.ucode_fw_obj[pipe],
400                                       &adev->mes.ucode_fw_gpu_addr[pipe],
401                                       (void **)&adev->mes.ucode_fw_ptr[pipe]);
402         if (r) {
403                 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
404                 return r;
405         }
406
407         memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
408
409         amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
410         amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
411
412         return 0;
413 }
414
415 static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
416                                                 enum admgpu_mes_pipe pipe)
417 {
418         int r;
419         const struct mes_firmware_header_v1_0 *mes_hdr;
420         const __le32 *fw_data;
421         unsigned fw_size;
422
423         mes_hdr = (const struct mes_firmware_header_v1_0 *)
424                 adev->mes.fw[pipe]->data;
425
426         fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
427                    le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
428         fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
429
430         r = amdgpu_bo_create_reserved(adev, fw_size,
431                                       64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
432                                       &adev->mes.data_fw_obj[pipe],
433                                       &adev->mes.data_fw_gpu_addr[pipe],
434                                       (void **)&adev->mes.data_fw_ptr[pipe]);
435         if (r) {
436                 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
437                 return r;
438         }
439
440         memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
441
442         amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
443         amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
444
445         return 0;
446 }
447
448 static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev,
449                                          enum admgpu_mes_pipe pipe)
450 {
451         amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
452                               &adev->mes.data_fw_gpu_addr[pipe],
453                               (void **)&adev->mes.data_fw_ptr[pipe]);
454
455         amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
456                               &adev->mes.ucode_fw_gpu_addr[pipe],
457                               (void **)&adev->mes.ucode_fw_ptr[pipe]);
458 }
459
460 static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
461 {
462         uint32_t pipe, data = 0;
463
464         if (enable) {
465                 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
466                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
467                 data = REG_SET_FIELD(data, CP_MES_CNTL,
468                              MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
469                 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
470
471                 mutex_lock(&adev->srbm_mutex);
472                 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
473                         if (!adev->enable_mes_kiq &&
474                             pipe == AMDGPU_MES_KIQ_PIPE)
475                                 continue;
476
477                         nv_grbm_select(adev, 3, pipe, 0, 0);
478                         WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
479                              (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
480                 }
481                 nv_grbm_select(adev, 0, 0, 0, 0);
482                 mutex_unlock(&adev->srbm_mutex);
483
484                 /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
485                 data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
486                 data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
487                                      BYPASS_UNCACHED, 0);
488                 WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
489
490                 /* unhalt MES and activate pipe0 */
491                 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
492                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
493                                      adev->enable_mes_kiq ? 1 : 0);
494                 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
495                 udelay(100);
496         } else {
497                 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
498                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
499                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
500                 data = REG_SET_FIELD(data, CP_MES_CNTL,
501                                      MES_INVALIDATE_ICACHE, 1);
502                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
503                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
504                                      adev->enable_mes_kiq ? 1 : 0);
505                 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
506                 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
507         }
508 }
509
510 /* This function is for backdoor MES firmware */
511 static int mes_v10_1_load_microcode(struct amdgpu_device *adev,
512                                     enum admgpu_mes_pipe pipe)
513 {
514         int r;
515         uint32_t data;
516
517         mes_v10_1_enable(adev, false);
518
519         if (!adev->mes.fw[pipe])
520                 return -EINVAL;
521
522         r = mes_v10_1_allocate_ucode_buffer(adev, pipe);
523         if (r)
524                 return r;
525
526         r = mes_v10_1_allocate_ucode_data_buffer(adev, pipe);
527         if (r) {
528                 mes_v10_1_free_ucode_buffers(adev, pipe);
529                 return r;
530         }
531
532         WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
533
534         mutex_lock(&adev->srbm_mutex);
535         /* me=3, pipe=0, queue=0 */
536         nv_grbm_select(adev, 3, pipe, 0, 0);
537
538         /* set ucode start address */
539         WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
540                      (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
541
542         /* set ucode fimrware address */
543         WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
544                      lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
545         WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
546                      upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
547
548         /* set ucode instruction cache boundary to 2M-1 */
549         WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
550
551         /* set ucode data firmware address */
552         WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
553                      lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
554         WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
555                      upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
556
557         /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
558         WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
559
560         /* invalidate ICACHE */
561         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
562         case IP_VERSION(10, 3, 0):
563                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
564                 break;
565         default:
566                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
567                 break;
568         }
569         data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
570         data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
571         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
572         case IP_VERSION(10, 3, 0):
573                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
574                 break;
575         default:
576                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
577                 break;
578         }
579
580         /* prime the ICACHE. */
581         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
582         case IP_VERSION(10, 3, 0):
583                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
584                 break;
585         default:
586                 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
587                 break;
588         }
589         data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
590         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
591         case IP_VERSION(10, 3, 0):
592                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
593                 break;
594         default:
595                 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
596                 break;
597         }
598
599         nv_grbm_select(adev, 0, 0, 0, 0);
600         mutex_unlock(&adev->srbm_mutex);
601
602         return 0;
603 }
604
605 static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev,
606                                       enum admgpu_mes_pipe pipe)
607 {
608         int r;
609         u32 *eop;
610
611         r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
612                               AMDGPU_GEM_DOMAIN_GTT,
613                               &adev->mes.eop_gpu_obj[pipe],
614                               &adev->mes.eop_gpu_addr[pipe],
615                               (void **)&eop);
616         if (r) {
617                 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
618                 return r;
619         }
620
621         memset(eop, 0, adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
622
623         amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
624         amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
625
626         return 0;
627 }
628
629 static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
630 {
631         struct v10_compute_mqd *mqd = ring->mqd_ptr;
632         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
633         uint32_t tmp;
634
635         memset(mqd, 0, sizeof(*mqd));
636
637         mqd->header = 0xC0310800;
638         mqd->compute_pipelinestat_enable = 0x00000001;
639         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
640         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
641         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
642         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
643         mqd->compute_misc_reserved = 0x00000003;
644
645         eop_base_addr = ring->eop_gpu_addr >> 8;
646
647         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
648         tmp = mmCP_HQD_EOP_CONTROL_DEFAULT;
649         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
650                         (order_base_2(MES_EOP_SIZE / 4) - 1));
651
652         mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
653         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
654         mqd->cp_hqd_eop_control = tmp;
655
656         /* disable the queue if it's active */
657         ring->wptr = 0;
658         mqd->cp_hqd_pq_rptr = 0;
659         mqd->cp_hqd_pq_wptr_lo = 0;
660         mqd->cp_hqd_pq_wptr_hi = 0;
661
662         /* set the pointer to the MQD */
663         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
664         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
665
666         /* set MQD vmid to 0 */
667         tmp = mmCP_MQD_CONTROL_DEFAULT;
668         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
669         mqd->cp_mqd_control = tmp;
670
671         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
672         hqd_gpu_addr = ring->gpu_addr >> 8;
673         mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
674         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
675
676         /* set the wb address whether it's enabled or not */
677         wb_gpu_addr = ring->rptr_gpu_addr;
678         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
679         mqd->cp_hqd_pq_rptr_report_addr_hi =
680                 upper_32_bits(wb_gpu_addr) & 0xffff;
681
682         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
683         wb_gpu_addr = ring->wptr_gpu_addr;
684         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
685         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
686
687         /* set up the HQD, this is similar to CP_RB0_CNTL */
688         tmp = mmCP_HQD_PQ_CONTROL_DEFAULT;
689         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
690                             (order_base_2(ring->ring_size / 4) - 1));
691         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
692                             ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
693 #ifdef __BIG_ENDIAN
694         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
695 #endif
696         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
697         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
698         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
699         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
700         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
701         mqd->cp_hqd_pq_control = tmp;
702
703         /* enable doorbell? */
704         tmp = 0;
705         if (ring->use_doorbell) {
706                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
707                                     DOORBELL_OFFSET, ring->doorbell_index);
708                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
709                                     DOORBELL_EN, 1);
710                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
711                                     DOORBELL_SOURCE, 0);
712                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
713                                     DOORBELL_HIT, 0);
714         }
715         else
716                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
717                                     DOORBELL_EN, 0);
718         mqd->cp_hqd_pq_doorbell_control = tmp;
719
720         mqd->cp_hqd_vmid = 0;
721         /* activate the queue */
722         mqd->cp_hqd_active = 1;
723         mqd->cp_hqd_persistent_state = mmCP_HQD_PERSISTENT_STATE_DEFAULT;
724         mqd->cp_hqd_ib_control = mmCP_HQD_IB_CONTROL_DEFAULT;
725         mqd->cp_hqd_iq_timer = mmCP_HQD_IQ_TIMER_DEFAULT;
726         mqd->cp_hqd_quantum = mmCP_HQD_QUANTUM_DEFAULT;
727
728         tmp = mmCP_HQD_GFX_CONTROL_DEFAULT;
729         tmp = REG_SET_FIELD(tmp, CP_HQD_GFX_CONTROL, DB_UPDATED_MSG_EN, 1);
730         /* offset: 184 - this is used for CP_HQD_GFX_CONTROL */
731         mqd->cp_hqd_suspend_cntl_stack_offset = tmp;
732
733         amdgpu_device_flush_hdp(ring->adev, NULL);
734         return 0;
735 }
736
737 #if 0
738 static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
739 {
740         struct v10_compute_mqd *mqd = ring->mqd_ptr;
741         struct amdgpu_device *adev = ring->adev;
742         uint32_t data = 0;
743
744         mutex_lock(&adev->srbm_mutex);
745         nv_grbm_select(adev, 3, ring->pipe, 0, 0);
746
747         /* set CP_HQD_VMID.VMID = 0. */
748         data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
749         data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
750         WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
751
752         /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
753         data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
754         data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
755                              DOORBELL_EN, 0);
756         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
757
758         /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
759         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
760         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
761
762         /* set CP_MQD_CONTROL.VMID=0 */
763         data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
764         data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
765         WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
766
767         /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
768         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
769         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
770
771         /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
772         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
773                      mqd->cp_hqd_pq_rptr_report_addr_lo);
774         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
775                      mqd->cp_hqd_pq_rptr_report_addr_hi);
776
777         /* set CP_HQD_PQ_CONTROL */
778         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
779
780         /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
781         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
782                      mqd->cp_hqd_pq_wptr_poll_addr_lo);
783         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
784                      mqd->cp_hqd_pq_wptr_poll_addr_hi);
785
786         /* set CP_HQD_PQ_DOORBELL_CONTROL */
787         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
788                      mqd->cp_hqd_pq_doorbell_control);
789
790         /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
791         WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
792
793         /* set CP_HQD_ACTIVE.ACTIVE=1 */
794         WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
795
796         nv_grbm_select(adev, 0, 0, 0, 0);
797         mutex_unlock(&adev->srbm_mutex);
798 }
799 #endif
800
801 static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
802 {
803         struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
804         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
805         int r;
806
807         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
808                 return -EINVAL;
809
810         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
811         if (r) {
812                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
813                 return r;
814         }
815
816         kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
817
818         return amdgpu_ring_test_helper(kiq_ring);
819 }
820
821 static int mes_v10_1_queue_init(struct amdgpu_device *adev)
822 {
823         int r;
824
825         r = mes_v10_1_mqd_init(&adev->mes.ring);
826         if (r)
827                 return r;
828
829         r = mes_v10_1_kiq_enable_queue(adev);
830         if (r)
831                 return r;
832
833         return 0;
834 }
835
836 static int mes_v10_1_ring_init(struct amdgpu_device *adev)
837 {
838         struct amdgpu_ring *ring;
839
840         ring = &adev->mes.ring;
841
842         ring->funcs = &mes_v10_1_ring_funcs;
843
844         ring->me = 3;
845         ring->pipe = 0;
846         ring->queue = 0;
847
848         ring->ring_obj = NULL;
849         ring->use_doorbell = true;
850         ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
851         ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
852         ring->no_scheduler = true;
853         sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
854
855         return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
856                                 AMDGPU_RING_PRIO_DEFAULT, NULL);
857 }
858
859 static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev)
860 {
861         struct amdgpu_ring *ring;
862
863         spin_lock_init(&adev->gfx.kiq[0].ring_lock);
864
865         ring = &adev->gfx.kiq[0].ring;
866
867         ring->me = 3;
868         ring->pipe = 1;
869         ring->queue = 0;
870
871         ring->adev = NULL;
872         ring->ring_obj = NULL;
873         ring->use_doorbell = true;
874         ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
875         ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
876         ring->no_scheduler = true;
877         sprintf(ring->name, "mes_kiq_%d.%d.%d",
878                 ring->me, ring->pipe, ring->queue);
879
880         return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
881                                 AMDGPU_RING_PRIO_DEFAULT, NULL);
882 }
883
884 static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
885                                  enum admgpu_mes_pipe pipe)
886 {
887         int r, mqd_size = sizeof(struct v10_compute_mqd);
888         struct amdgpu_ring *ring;
889
890         if (pipe == AMDGPU_MES_KIQ_PIPE)
891                 ring = &adev->gfx.kiq[0].ring;
892         else if (pipe == AMDGPU_MES_SCHED_PIPE)
893                 ring = &adev->mes.ring;
894         else
895                 BUG();
896
897         if (ring->mqd_obj)
898                 return 0;
899
900         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
901                                     AMDGPU_GEM_DOMAIN_VRAM |
902                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
903                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
904         if (r) {
905                 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
906                 return r;
907         }
908         memset(ring->mqd_ptr, 0, mqd_size);
909
910         /* prepare MQD backup */
911         adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
912         if (!adev->mes.mqd_backup[pipe]) {
913                 dev_warn(adev->dev,
914                          "no memory to create MQD backup for ring %s\n",
915                          ring->name);
916                 return -ENOMEM;
917         }
918
919         return 0;
920 }
921
922 static int mes_v10_1_sw_init(void *handle)
923 {
924         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
925         int pipe, r;
926
927         adev->mes.funcs = &mes_v10_1_funcs;
928         adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init;
929
930         r = amdgpu_mes_init(adev);
931         if (r)
932                 return r;
933
934         for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
935                 if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
936                         continue;
937
938                 r = mes_v10_1_allocate_eop_buf(adev, pipe);
939                 if (r)
940                         return r;
941
942                 r = mes_v10_1_mqd_sw_init(adev, pipe);
943                 if (r)
944                         return r;
945         }
946
947         if (adev->enable_mes_kiq) {
948                 r = mes_v10_1_kiq_ring_init(adev);
949                 if (r)
950                         return r;
951         }
952
953         r = mes_v10_1_ring_init(adev);
954         if (r)
955                 return r;
956
957         return 0;
958 }
959
960 static int mes_v10_1_sw_fini(void *handle)
961 {
962         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
963         int pipe;
964
965         amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
966         amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
967
968         for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
969                 kfree(adev->mes.mqd_backup[pipe]);
970
971                 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
972                                       &adev->mes.eop_gpu_addr[pipe],
973                                       NULL);
974                 amdgpu_ucode_release(&adev->mes.fw[pipe]);
975         }
976
977         amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
978                               &adev->gfx.kiq[0].ring.mqd_gpu_addr,
979                               &adev->gfx.kiq[0].ring.mqd_ptr);
980
981         amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
982                               &adev->mes.ring.mqd_gpu_addr,
983                               &adev->mes.ring.mqd_ptr);
984
985         amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
986         amdgpu_ring_fini(&adev->mes.ring);
987
988         amdgpu_mes_fini(adev);
989         return 0;
990 }
991
992 static void mes_v10_1_kiq_setting(struct amdgpu_ring *ring)
993 {
994         uint32_t tmp;
995         struct amdgpu_device *adev = ring->adev;
996
997         /* tell RLC which is KIQ queue */
998         switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
999         case IP_VERSION(10, 3, 0):
1000         case IP_VERSION(10, 3, 2):
1001         case IP_VERSION(10, 3, 1):
1002         case IP_VERSION(10, 3, 4):
1003                 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
1004                 tmp &= 0xffffff00;
1005                 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1006                 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
1007                 tmp |= 0x80;
1008                 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
1009                 break;
1010         default:
1011                 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
1012                 tmp &= 0xffffff00;
1013                 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1014                 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
1015                 tmp |= 0x80;
1016                 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
1017                 break;
1018         }
1019 }
1020
1021 static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev)
1022 {
1023         int r = 0;
1024
1025         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1026                 r = mes_v10_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE);
1027                 if (r) {
1028                         DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1029                         return r;
1030                 }
1031
1032                 r = mes_v10_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE);
1033                 if (r) {
1034                         DRM_ERROR("failed to load MES fw, r=%d\n", r);
1035                         return r;
1036                 }
1037         }
1038
1039         mes_v10_1_enable(adev, true);
1040
1041         mes_v10_1_kiq_setting(&adev->gfx.kiq[0].ring);
1042
1043         r = mes_v10_1_queue_init(adev);
1044         if (r)
1045                 goto failure;
1046
1047         return r;
1048
1049 failure:
1050         mes_v10_1_hw_fini(adev);
1051         return r;
1052 }
1053
1054 static int mes_v10_1_hw_init(void *handle)
1055 {
1056         int r;
1057         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1058
1059         if (!adev->enable_mes_kiq) {
1060                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1061                         r = mes_v10_1_load_microcode(adev,
1062                                              AMDGPU_MES_SCHED_PIPE);
1063                         if (r) {
1064                                 DRM_ERROR("failed to MES fw, r=%d\n", r);
1065                                 return r;
1066                         }
1067                 }
1068
1069                 mes_v10_1_enable(adev, true);
1070         }
1071
1072         r = mes_v10_1_queue_init(adev);
1073         if (r)
1074                 goto failure;
1075
1076         r = mes_v10_1_set_hw_resources(&adev->mes);
1077         if (r)
1078                 goto failure;
1079
1080         mes_v10_1_init_aggregated_doorbell(&adev->mes);
1081
1082         r = mes_v10_1_query_sched_status(&adev->mes);
1083         if (r) {
1084                 DRM_ERROR("MES is busy\n");
1085                 goto failure;
1086         }
1087
1088         /*
1089          * Disable KIQ ring usage from the driver once MES is enabled.
1090          * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1091          * with MES enabled.
1092          */
1093         adev->gfx.kiq[0].ring.sched.ready = false;
1094         adev->mes.ring.sched.ready = true;
1095
1096         return 0;
1097
1098 failure:
1099         mes_v10_1_hw_fini(adev);
1100         return r;
1101 }
1102
1103 static int mes_v10_1_hw_fini(void *handle)
1104 {
1105         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1106
1107         adev->mes.ring.sched.ready = false;
1108
1109         mes_v10_1_enable(adev, false);
1110
1111         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1112                 mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
1113                 mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
1114         }
1115
1116         return 0;
1117 }
1118
1119 static int mes_v10_1_suspend(void *handle)
1120 {
1121         int r;
1122         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1123
1124         r = amdgpu_mes_suspend(adev);
1125         if (r)
1126                 return r;
1127
1128         return mes_v10_1_hw_fini(adev);
1129 }
1130
1131 static int mes_v10_1_resume(void *handle)
1132 {
1133         int r;
1134         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1135
1136         r = mes_v10_1_hw_init(adev);
1137         if (r)
1138                 return r;
1139
1140         return amdgpu_mes_resume(adev);
1141 }
1142
1143 static int mes_v10_0_early_init(void *handle)
1144 {
1145         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1146         int pipe, r;
1147
1148         for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1149                 if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
1150                         continue;
1151                 r = amdgpu_mes_init_microcode(adev, pipe);
1152                 if (r)
1153                         return r;
1154         }
1155
1156         return 0;
1157 }
1158
1159 static int mes_v10_0_late_init(void *handle)
1160 {
1161         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1162
1163         if (!amdgpu_in_reset(adev))
1164                 amdgpu_mes_self_test(adev);
1165
1166         return 0;
1167 }
1168
1169 static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
1170         .name = "mes_v10_1",
1171         .early_init = mes_v10_0_early_init,
1172         .late_init = mes_v10_0_late_init,
1173         .sw_init = mes_v10_1_sw_init,
1174         .sw_fini = mes_v10_1_sw_fini,
1175         .hw_init = mes_v10_1_hw_init,
1176         .hw_fini = mes_v10_1_hw_fini,
1177         .suspend = mes_v10_1_suspend,
1178         .resume = mes_v10_1_resume,
1179 };
1180
1181 const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
1182         .type = AMD_IP_BLOCK_TYPE_MES,
1183         .major = 10,
1184         .minor = 1,
1185         .rev = 0,
1186         .funcs = &mes_v10_1_ip_funcs,
1187 };