Merge tag 'amd-drm-next-5.13-2021-04-12' of https://gitlab.freedesktop.org/agd5f...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK   0x02
45
46 #define VCE_V4_0_FW_SIZE        (384 * 1024)
47 #define VCE_V4_0_STACK_SIZE     (64 * 1024)
48 #define VCE_V4_0_DATA_SIZE      ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63         struct amdgpu_device *adev = ring->adev;
64
65         if (ring->me == 0)
66                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67         else if (ring->me == 1)
68                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69         else
70                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82         struct amdgpu_device *adev = ring->adev;
83
84         if (ring->use_doorbell)
85                 return adev->wb.wb[ring->wptr_offs];
86
87         if (ring->me == 0)
88                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89         else if (ring->me == 1)
90                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91         else
92                 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104         struct amdgpu_device *adev = ring->adev;
105
106         if (ring->use_doorbell) {
107                 /* XXX check if swapping is necessary on BE */
108                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110                 return;
111         }
112
113         if (ring->me == 0)
114                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115                         lower_32_bits(ring->wptr));
116         else if (ring->me == 1)
117                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118                         lower_32_bits(ring->wptr));
119         else
120                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121                         lower_32_bits(ring->wptr));
122 }
123
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126         int i, j;
127
128         for (i = 0; i < 10; ++i) {
129                 for (j = 0; j < 100; ++j) {
130                         uint32_t status =
131                                 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132
133                         if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134                                 return 0;
135                         mdelay(10);
136                 }
137
138                 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140                                 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142                 mdelay(10);
143                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144                                 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145                 mdelay(10);
146
147         }
148
149         return -ETIMEDOUT;
150 }
151
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153                                 struct amdgpu_mm_table *table)
154 {
155         uint32_t data = 0, loop;
156         uint64_t addr = table->gpu_addr;
157         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158         uint32_t size;
159
160         size = header->header_size + header->vce_table_size + header->uvd_table_size;
161
162         /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165
166         /* 2, update vmid of descriptor */
167         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168         data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169         data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171
172         /* 3, notify mmsch about the size of this descriptor */
173         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174
175         /* 4, set resp to zero */
176         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177
178         WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179         adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180         adev->vce.ring[0].wptr = 0;
181         adev->vce.ring[0].wptr_old = 0;
182
183         /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185
186         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187         loop = 1000;
188         while ((data & 0x10000002) != 0x10000002) {
189                 udelay(10);
190                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191                 loop--;
192                 if (!loop)
193                         break;
194         }
195
196         if (!loop) {
197                 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198                 return -EBUSY;
199         }
200
201         return 0;
202 }
203
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206         struct amdgpu_ring *ring;
207         uint32_t offset, size;
208         uint32_t table_size = 0;
209         struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210         struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211         struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212         struct mmsch_v1_0_cmd_end end = { { 0 } };
213         uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214         struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215
216         direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217         direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218         direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219         end.cmd_header.command_type = MMSCH_COMMAND__END;
220
221         if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222                 header->version = MMSCH_VERSION;
223                 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224
225                 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226                         header->vce_table_offset = header->header_size;
227                 else
228                         header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229
230                 init_table += header->vce_table_offset;
231
232                 ring = &adev->vce.ring[0];
233                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234                                             lower_32_bits(ring->gpu_addr));
235                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236                                             upper_32_bits(ring->gpu_addr));
237                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238                                             ring->ring_size / 4);
239
240                 /* BEGING OF MC_RESUME */
241                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246
247                 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249                         uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250                         uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251                         uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
252
253                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257                                                 (tmr_mc_addr >> 40) & 0xff);
258                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259                 } else {
260                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262                                                 adev->vce.gpu_addr >> 8);
263                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265                                                 (adev->vce.gpu_addr >> 40) & 0xff);
266                         MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267                                                 offset & ~0x0f000000);
268
269                 }
270                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272                                                 adev->vce.gpu_addr >> 8);
273                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275                                                 (adev->vce.gpu_addr >> 40) & 0xff);
276                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277                                                 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278                                                 adev->vce.gpu_addr >> 8);
279                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280                                                 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281                                                 (adev->vce.gpu_addr >> 40) & 0xff);
282
283                 size = VCE_V4_0_FW_SIZE;
284                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
285
286                 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287                 size = VCE_V4_0_STACK_SIZE;
288                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289                                         (offset & ~0x0f000000) | (1 << 24));
290                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
291
292                 offset += size;
293                 size = VCE_V4_0_DATA_SIZE;
294                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295                                         (offset & ~0x0f000000) | (2 << 24));
296                 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
297
298                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301                                                    VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
302
303                 /* end of MC_RESUME */
304                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305                                                    VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307                                                    ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309                                                    ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
310
311                 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313                                               VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
314
315                 /* clear BUSY flag */
316                 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317                                                    ~VCE_STATUS__JOB_BUSY_MASK, 0);
318
319                 /* add end packet */
320                 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321                 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322                 header->vce_table_size = table_size;
323         }
324
325         return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
326 }
327
328 /**
329  * vce_v4_0_start - start VCE block
330  *
331  * @adev: amdgpu_device pointer
332  *
333  * Setup and start the VCE block
334  */
335 static int vce_v4_0_start(struct amdgpu_device *adev)
336 {
337         struct amdgpu_ring *ring;
338         int r;
339
340         ring = &adev->vce.ring[0];
341
342         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
347
348         ring = &adev->vce.ring[1];
349
350         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
355
356         ring = &adev->vce.ring[2];
357
358         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
363
364         vce_v4_0_mc_resume(adev);
365         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366                         ~VCE_STATUS__JOB_BUSY_MASK);
367
368         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
369
370         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372         mdelay(100);
373
374         r = vce_v4_0_firmware_loaded(adev);
375
376         /* clear BUSY flag */
377         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
378
379         if (r) {
380                 DRM_ERROR("VCE not responding, giving up!!!\n");
381                 return r;
382         }
383
384         return 0;
385 }
386
387 static int vce_v4_0_stop(struct amdgpu_device *adev)
388 {
389
390         /* Disable VCPU */
391         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
392
393         /* hold on ECPU */
394         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395                         VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396                         ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
397
398         /* clear VCE_STATUS */
399         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
400
401         /* Set Clock-Gating off */
402         /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403                 vce_v4_0_set_vce_sw_clock_gating(adev, false);
404         */
405
406         return 0;
407 }
408
409 static int vce_v4_0_early_init(void *handle)
410 {
411         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
412
413         if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414                 adev->vce.num_rings = 1;
415         else
416                 adev->vce.num_rings = 3;
417
418         vce_v4_0_set_ring_funcs(adev);
419         vce_v4_0_set_irq_funcs(adev);
420
421         return 0;
422 }
423
424 static int vce_v4_0_sw_init(void *handle)
425 {
426         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427         struct amdgpu_ring *ring;
428
429         unsigned size;
430         int r, i;
431
432         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433         if (r)
434                 return r;
435
436         size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438                 size += VCE_V4_0_FW_SIZE;
439
440         r = amdgpu_vce_sw_init(adev, size);
441         if (r)
442                 return r;
443
444         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445                 const struct common_firmware_header *hdr;
446                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
447
448                 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449                 if (!adev->vce.saved_bo)
450                         return -ENOMEM;
451
452                 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455                 adev->firmware.fw_size +=
456                         ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457                 DRM_INFO("PSP loading VCE firmware\n");
458         } else {
459                 r = amdgpu_vce_resume(adev);
460                 if (r)
461                         return r;
462         }
463
464         for (i = 0; i < adev->vce.num_rings; i++) {
465                 ring = &adev->vce.ring[i];
466                 sprintf(ring->name, "vce%d", i);
467                 if (amdgpu_sriov_vf(adev)) {
468                         /* DOORBELL only works under SRIOV */
469                         ring->use_doorbell = true;
470
471                         /* currently only use the first encoding ring for sriov,
472                          * so set unused location for other unused rings.
473                          */
474                         if (i == 0)
475                                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476                         else
477                                 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
478                 }
479                 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
480                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
481                 if (r)
482                         return r;
483         }
484
485
486         r = amdgpu_vce_entity_init(adev);
487         if (r)
488                 return r;
489
490         r = amdgpu_virt_alloc_mm_table(adev);
491         if (r)
492                 return r;
493
494         return r;
495 }
496
497 static int vce_v4_0_sw_fini(void *handle)
498 {
499         int r;
500         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
501
502         /* free MM table */
503         amdgpu_virt_free_mm_table(adev);
504
505         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
506                 kvfree(adev->vce.saved_bo);
507                 adev->vce.saved_bo = NULL;
508         }
509
510         r = amdgpu_vce_suspend(adev);
511         if (r)
512                 return r;
513
514         return amdgpu_vce_sw_fini(adev);
515 }
516
517 static int vce_v4_0_hw_init(void *handle)
518 {
519         int r, i;
520         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
521
522         if (amdgpu_sriov_vf(adev))
523                 r = vce_v4_0_sriov_start(adev);
524         else
525                 r = vce_v4_0_start(adev);
526         if (r)
527                 return r;
528
529         for (i = 0; i < adev->vce.num_rings; i++) {
530                 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
531                 if (r)
532                         return r;
533         }
534
535         DRM_INFO("VCE initialized successfully.\n");
536
537         return 0;
538 }
539
540 static int vce_v4_0_hw_fini(void *handle)
541 {
542         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
543
544         if (!amdgpu_sriov_vf(adev)) {
545                 /* vce_v4_0_wait_for_idle(handle); */
546                 vce_v4_0_stop(adev);
547         } else {
548                 /* full access mode, so don't touch any VCE register */
549                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
550         }
551
552         return 0;
553 }
554
555 static int vce_v4_0_suspend(void *handle)
556 {
557         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
558         int r;
559
560         if (adev->vce.vcpu_bo == NULL)
561                 return 0;
562
563         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
564                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
565                 void *ptr = adev->vce.cpu_addr;
566
567                 memcpy_fromio(adev->vce.saved_bo, ptr, size);
568         }
569
570         r = vce_v4_0_hw_fini(adev);
571         if (r)
572                 return r;
573
574         return amdgpu_vce_suspend(adev);
575 }
576
577 static int vce_v4_0_resume(void *handle)
578 {
579         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
580         int r;
581
582         if (adev->vce.vcpu_bo == NULL)
583                 return -EINVAL;
584
585         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
586                 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
587                 void *ptr = adev->vce.cpu_addr;
588
589                 memcpy_toio(ptr, adev->vce.saved_bo, size);
590         } else {
591                 r = amdgpu_vce_resume(adev);
592                 if (r)
593                         return r;
594         }
595
596         return vce_v4_0_hw_init(adev);
597 }
598
599 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
600 {
601         uint32_t offset, size;
602         uint64_t tmr_mc_addr;
603
604         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
605         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
606         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
607         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
608
609         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
610         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
611         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
612         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
613         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
614
615         offset = AMDGPU_VCE_FIRMWARE_OFFSET;
616
617         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
618                 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
619                                                                                 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
620                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
621                         (tmr_mc_addr >> 8));
622                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
623                         (tmr_mc_addr >> 40) & 0xff);
624                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
625         } else {
626                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
627                         (adev->vce.gpu_addr >> 8));
628                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
629                         (adev->vce.gpu_addr >> 40) & 0xff);
630                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
631         }
632
633         size = VCE_V4_0_FW_SIZE;
634         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
635
636         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
637         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
638         offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
639         size = VCE_V4_0_STACK_SIZE;
640         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
641         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
642
643         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
644         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
645         offset += size;
646         size = VCE_V4_0_DATA_SIZE;
647         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
648         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
649
650         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
651         WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
652                         VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
653                         ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
654 }
655
656 static int vce_v4_0_set_clockgating_state(void *handle,
657                                           enum amd_clockgating_state state)
658 {
659         /* needed for driver unload*/
660         return 0;
661 }
662
663 #if 0
664 static bool vce_v4_0_is_idle(void *handle)
665 {
666         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
667         u32 mask = 0;
668
669         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
670         mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
671
672         return !(RREG32(mmSRBM_STATUS2) & mask);
673 }
674
675 static int vce_v4_0_wait_for_idle(void *handle)
676 {
677         unsigned i;
678         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
679
680         for (i = 0; i < adev->usec_timeout; i++)
681                 if (vce_v4_0_is_idle(handle))
682                         return 0;
683
684         return -ETIMEDOUT;
685 }
686
687 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
688 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
689 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
690 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
691                                       VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
692
693 static bool vce_v4_0_check_soft_reset(void *handle)
694 {
695         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
696         u32 srbm_soft_reset = 0;
697
698         /* According to VCE team , we should use VCE_STATUS instead
699          * SRBM_STATUS.VCE_BUSY bit for busy status checking.
700          * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
701          * instance's registers are accessed
702          * (0 for 1st instance, 10 for 2nd instance).
703          *
704          *VCE_STATUS
705          *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
706          *|----+----+-----------+----+----+----+----------+---------+----|
707          *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
708          *
709          * VCE team suggest use bit 3--bit 6 for busy status check
710          */
711         mutex_lock(&adev->grbm_idx_mutex);
712         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
713         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
714                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
715                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
716         }
717         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
718         if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
719                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
720                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
721         }
722         WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
723         mutex_unlock(&adev->grbm_idx_mutex);
724
725         if (srbm_soft_reset) {
726                 adev->vce.srbm_soft_reset = srbm_soft_reset;
727                 return true;
728         } else {
729                 adev->vce.srbm_soft_reset = 0;
730                 return false;
731         }
732 }
733
734 static int vce_v4_0_soft_reset(void *handle)
735 {
736         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
737         u32 srbm_soft_reset;
738
739         if (!adev->vce.srbm_soft_reset)
740                 return 0;
741         srbm_soft_reset = adev->vce.srbm_soft_reset;
742
743         if (srbm_soft_reset) {
744                 u32 tmp;
745
746                 tmp = RREG32(mmSRBM_SOFT_RESET);
747                 tmp |= srbm_soft_reset;
748                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
749                 WREG32(mmSRBM_SOFT_RESET, tmp);
750                 tmp = RREG32(mmSRBM_SOFT_RESET);
751
752                 udelay(50);
753
754                 tmp &= ~srbm_soft_reset;
755                 WREG32(mmSRBM_SOFT_RESET, tmp);
756                 tmp = RREG32(mmSRBM_SOFT_RESET);
757
758                 /* Wait a little for things to settle down */
759                 udelay(50);
760         }
761
762         return 0;
763 }
764
765 static int vce_v4_0_pre_soft_reset(void *handle)
766 {
767         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
768
769         if (!adev->vce.srbm_soft_reset)
770                 return 0;
771
772         mdelay(5);
773
774         return vce_v4_0_suspend(adev);
775 }
776
777
778 static int vce_v4_0_post_soft_reset(void *handle)
779 {
780         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
781
782         if (!adev->vce.srbm_soft_reset)
783                 return 0;
784
785         mdelay(5);
786
787         return vce_v4_0_resume(adev);
788 }
789
790 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
791 {
792         u32 tmp, data;
793
794         tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
795         if (override)
796                 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
797         else
798                 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
799
800         if (tmp != data)
801                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
802 }
803
804 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
805                                              bool gated)
806 {
807         u32 data;
808
809         /* Set Override to disable Clock Gating */
810         vce_v4_0_override_vce_clock_gating(adev, true);
811
812         /* This function enables MGCG which is controlled by firmware.
813            With the clocks in the gated state the core is still
814            accessible but the firmware will throttle the clocks on the
815            fly as necessary.
816         */
817         if (gated) {
818                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
819                 data |= 0x1ff;
820                 data &= ~0xef0000;
821                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
822
823                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
824                 data |= 0x3ff000;
825                 data &= ~0xffc00000;
826                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
827
828                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
829                 data |= 0x2;
830                 data &= ~0x00010000;
831                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
832
833                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
834                 data |= 0x37f;
835                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
836
837                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
838                 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
839                         VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
840                         VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
841                         0x8;
842                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
843         } else {
844                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
845                 data &= ~0x80010;
846                 data |= 0xe70008;
847                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
848
849                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
850                 data |= 0xffc00000;
851                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
852
853                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
854                 data |= 0x10000;
855                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
856
857                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
858                 data &= ~0xffc00000;
859                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
860
861                 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
862                 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
863                           VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
864                           VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
865                           0x8);
866                 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
867         }
868         vce_v4_0_override_vce_clock_gating(adev, false);
869 }
870
871 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
872 {
873         u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
874
875         if (enable)
876                 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
877         else
878                 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
879
880         WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
881 }
882
883 static int vce_v4_0_set_clockgating_state(void *handle,
884                                           enum amd_clockgating_state state)
885 {
886         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
887         bool enable = (state == AMD_CG_STATE_GATE);
888         int i;
889
890         if ((adev->asic_type == CHIP_POLARIS10) ||
891                 (adev->asic_type == CHIP_TONGA) ||
892                 (adev->asic_type == CHIP_FIJI))
893                 vce_v4_0_set_bypass_mode(adev, enable);
894
895         if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
896                 return 0;
897
898         mutex_lock(&adev->grbm_idx_mutex);
899         for (i = 0; i < 2; i++) {
900                 /* Program VCE Instance 0 or 1 if not harvested */
901                 if (adev->vce.harvest_config & (1 << i))
902                         continue;
903
904                 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
905
906                 if (enable) {
907                         /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
908                         uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
909                         data &= ~(0xf | 0xff0);
910                         data |= ((0x0 << 0) | (0x04 << 4));
911                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
912
913                         /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
914                         data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
915                         data &= ~(0xf | 0xff0);
916                         data |= ((0x0 << 0) | (0x04 << 4));
917                         WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
918                 }
919
920                 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
921         }
922
923         WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
924         mutex_unlock(&adev->grbm_idx_mutex);
925
926         return 0;
927 }
928 #endif
929
930 static int vce_v4_0_set_powergating_state(void *handle,
931                                           enum amd_powergating_state state)
932 {
933         /* This doesn't actually powergate the VCE block.
934          * That's done in the dpm code via the SMC.  This
935          * just re-inits the block as necessary.  The actual
936          * gating still happens in the dpm code.  We should
937          * revisit this when there is a cleaner line between
938          * the smc and the hw blocks
939          */
940         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
941
942         if (state == AMD_PG_STATE_GATE)
943                 return vce_v4_0_stop(adev);
944         else
945                 return vce_v4_0_start(adev);
946 }
947
948 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
949                                         struct amdgpu_ib *ib, uint32_t flags)
950 {
951         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
952
953         amdgpu_ring_write(ring, VCE_CMD_IB_VM);
954         amdgpu_ring_write(ring, vmid);
955         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
956         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
957         amdgpu_ring_write(ring, ib->length_dw);
958 }
959
960 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
961                         u64 seq, unsigned flags)
962 {
963         WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
964
965         amdgpu_ring_write(ring, VCE_CMD_FENCE);
966         amdgpu_ring_write(ring, addr);
967         amdgpu_ring_write(ring, upper_32_bits(addr));
968         amdgpu_ring_write(ring, seq);
969         amdgpu_ring_write(ring, VCE_CMD_TRAP);
970 }
971
972 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
973 {
974         amdgpu_ring_write(ring, VCE_CMD_END);
975 }
976
977 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
978                                    uint32_t val, uint32_t mask)
979 {
980         amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
981         amdgpu_ring_write(ring, reg << 2);
982         amdgpu_ring_write(ring, mask);
983         amdgpu_ring_write(ring, val);
984 }
985
986 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
987                                    unsigned int vmid, uint64_t pd_addr)
988 {
989         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
990
991         pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
992
993         /* wait for reg writes */
994         vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
995                                vmid * hub->ctx_addr_distance,
996                                lower_32_bits(pd_addr), 0xffffffff);
997 }
998
999 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1000                                uint32_t reg, uint32_t val)
1001 {
1002         amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1003         amdgpu_ring_write(ring, reg << 2);
1004         amdgpu_ring_write(ring, val);
1005 }
1006
1007 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1008                                         struct amdgpu_irq_src *source,
1009                                         unsigned type,
1010                                         enum amdgpu_interrupt_state state)
1011 {
1012         uint32_t val = 0;
1013
1014         if (!amdgpu_sriov_vf(adev)) {
1015                 if (state == AMDGPU_IRQ_STATE_ENABLE)
1016                         val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1017
1018                 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1019                                 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1020         }
1021         return 0;
1022 }
1023
1024 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1025                                       struct amdgpu_irq_src *source,
1026                                       struct amdgpu_iv_entry *entry)
1027 {
1028         DRM_DEBUG("IH: VCE\n");
1029
1030         switch (entry->src_data[0]) {
1031         case 0:
1032         case 1:
1033         case 2:
1034                 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1035                 break;
1036         default:
1037                 DRM_ERROR("Unhandled interrupt: %d %d\n",
1038                           entry->src_id, entry->src_data[0]);
1039                 break;
1040         }
1041
1042         return 0;
1043 }
1044
1045 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1046         .name = "vce_v4_0",
1047         .early_init = vce_v4_0_early_init,
1048         .late_init = NULL,
1049         .sw_init = vce_v4_0_sw_init,
1050         .sw_fini = vce_v4_0_sw_fini,
1051         .hw_init = vce_v4_0_hw_init,
1052         .hw_fini = vce_v4_0_hw_fini,
1053         .suspend = vce_v4_0_suspend,
1054         .resume = vce_v4_0_resume,
1055         .is_idle = NULL /* vce_v4_0_is_idle */,
1056         .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1057         .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1058         .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1059         .soft_reset = NULL /* vce_v4_0_soft_reset */,
1060         .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1061         .set_clockgating_state = vce_v4_0_set_clockgating_state,
1062         .set_powergating_state = vce_v4_0_set_powergating_state,
1063 };
1064
1065 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1066         .type = AMDGPU_RING_TYPE_VCE,
1067         .align_mask = 0x3f,
1068         .nop = VCE_CMD_NO_OP,
1069         .support_64bit_ptrs = false,
1070         .no_user_fence = true,
1071         .vmhub = AMDGPU_MMHUB_0,
1072         .get_rptr = vce_v4_0_ring_get_rptr,
1073         .get_wptr = vce_v4_0_ring_get_wptr,
1074         .set_wptr = vce_v4_0_ring_set_wptr,
1075         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1076         .emit_frame_size =
1077                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1078                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1079                 4 + /* vce_v4_0_emit_vm_flush */
1080                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1081                 1, /* vce_v4_0_ring_insert_end */
1082         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1083         .emit_ib = vce_v4_0_ring_emit_ib,
1084         .emit_vm_flush = vce_v4_0_emit_vm_flush,
1085         .emit_fence = vce_v4_0_ring_emit_fence,
1086         .test_ring = amdgpu_vce_ring_test_ring,
1087         .test_ib = amdgpu_vce_ring_test_ib,
1088         .insert_nop = amdgpu_ring_insert_nop,
1089         .insert_end = vce_v4_0_ring_insert_end,
1090         .pad_ib = amdgpu_ring_generic_pad_ib,
1091         .begin_use = amdgpu_vce_ring_begin_use,
1092         .end_use = amdgpu_vce_ring_end_use,
1093         .emit_wreg = vce_v4_0_emit_wreg,
1094         .emit_reg_wait = vce_v4_0_emit_reg_wait,
1095         .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1096 };
1097
1098 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1099 {
1100         int i;
1101
1102         for (i = 0; i < adev->vce.num_rings; i++) {
1103                 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1104                 adev->vce.ring[i].me = i;
1105         }
1106         DRM_INFO("VCE enabled in VM mode\n");
1107 }
1108
1109 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1110         .set = vce_v4_0_set_interrupt_state,
1111         .process = vce_v4_0_process_interrupt,
1112 };
1113
1114 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1115 {
1116         adev->vce.irq.num_types = 1;
1117         adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1118 };
1119
1120 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1121 {
1122         .type = AMD_IP_BLOCK_TYPE_VCE,
1123         .major = 4,
1124         .minor = 0,
1125         .rev = 0,
1126         .funcs = &vce_v4_0_ip_funcs,
1127 };