1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
8 #include <linux/slab.h>
10 #include "../habanalabs.h"
12 bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
14 struct asic_fixed_properties *prop = &hdev->asic_prop;
16 return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
17 prop->dmmu.start_addr,
22 * hl_mmu_init() - initialize the MMU module.
23 * @hdev: habanalabs device structure.
25 * Return: 0 for success, non-zero for failure.
27 int hl_mmu_init(struct hl_device *hdev)
31 if (!hdev->mmu_enable)
34 if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
35 rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
40 if (hdev->mmu_func[MMU_HR_PGT].init != NULL)
41 rc = hdev->mmu_func[MMU_HR_PGT].init(hdev);
47 * hl_mmu_fini() - release the MMU module.
48 * @hdev: habanalabs device structure.
50 * This function does the following:
51 * - Disable MMU in H/W.
52 * - Free the pgt_infos pool.
54 * All contexts should be freed before calling this function.
56 void hl_mmu_fini(struct hl_device *hdev)
58 if (!hdev->mmu_enable)
61 if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
62 hdev->mmu_func[MMU_DR_PGT].fini(hdev);
64 if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
65 hdev->mmu_func[MMU_HR_PGT].fini(hdev);
69 * hl_mmu_ctx_init() - initialize a context for using the MMU module.
70 * @ctx: pointer to the context structure to initialize.
72 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
73 * page tables hops related to this context.
74 * Return: 0 on success, non-zero otherwise.
76 int hl_mmu_ctx_init(struct hl_ctx *ctx)
78 struct hl_device *hdev = ctx->hdev;
81 if (!hdev->mmu_enable)
84 mutex_init(&ctx->mmu_lock);
86 if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
87 rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
92 if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL)
93 rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx);
99 * hl_mmu_ctx_fini - disable a ctx from using the mmu module
101 * @ctx: pointer to the context structure
103 * This function does the following:
104 * - Free any pgts which were not freed yet
106 * - Free DRAM default page mapping hops
108 void hl_mmu_ctx_fini(struct hl_ctx *ctx)
110 struct hl_device *hdev = ctx->hdev;
112 if (!hdev->mmu_enable)
115 if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL)
116 hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx);
118 if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
119 hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
121 mutex_destroy(&ctx->mmu_lock);
125 * hl_mmu_unmap_page - unmaps a virtual addr
127 * @ctx: pointer to the context structure
128 * @virt_addr: virt addr to map from
129 * @page_size: size of the page to unmap
130 * @flush_pte: whether to do a PCI flush
132 * This function does the following:
133 * - Check that the virt addr is mapped
134 * - Unmap the virt addr and frees pgts if possible
135 * - Returns 0 on success, -EINVAL if the given addr is not mapped
137 * Because this function changes the page tables in the device and because it
138 * changes the MMU hash, it must be protected by a lock.
139 * However, because it maps only a single page, the lock should be implemented
140 * in a higher level in order to protect the entire mapping of the memory area
142 * For optimization reasons PCI flush may be requested once after unmapping of
145 int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
148 struct hl_device *hdev = ctx->hdev;
149 struct asic_fixed_properties *prop = &hdev->asic_prop;
150 struct hl_mmu_properties *mmu_prop;
152 u32 real_page_size, npages;
153 int i, rc = 0, pgt_residency;
156 if (!hdev->mmu_enable)
159 is_dram_addr = hl_is_dram_va(hdev, virt_addr);
162 mmu_prop = &prop->dmmu;
163 else if ((page_size % prop->pmmu_huge.page_size) == 0)
164 mmu_prop = &prop->pmmu_huge;
166 mmu_prop = &prop->pmmu;
168 pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
170 * The H/W handles mapping of specific page sizes. Hence if the page
171 * size is bigger, we break it to sub-pages and unmap them separately.
173 if ((page_size % mmu_prop->page_size) == 0) {
174 real_page_size = mmu_prop->page_size;
177 * MMU page size may differ from DRAM page size.
178 * In such case work with the DRAM page size and let the MMU
179 * scrambling routine to handle this mismatch when
180 * calculating the address to remove from the MMU page table
182 if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) {
183 real_page_size = prop->dram_page_size;
186 "page size of %u is not %uKB aligned, can't unmap\n",
187 page_size, mmu_prop->page_size >> 10);
193 npages = page_size / real_page_size;
194 real_virt_addr = virt_addr;
196 for (i = 0 ; i < npages ; i++) {
197 rc = hdev->mmu_func[pgt_residency].unmap(ctx,
198 real_virt_addr, is_dram_addr);
202 real_virt_addr += real_page_size;
206 hdev->mmu_func[pgt_residency].flush(ctx);
212 * hl_mmu_map_page - maps a virtual addr to physical addr
214 * @ctx: pointer to the context structure
215 * @virt_addr: virt addr to map from
216 * @phys_addr: phys addr to map to
217 * @page_size: physical page size
218 * @flush_pte: whether to do a PCI flush
220 * This function does the following:
221 * - Check that the virt addr is not mapped
222 * - Allocate pgts as necessary in order to map the virt addr to the phys
223 * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
225 * Because this function changes the page tables in the device and because it
226 * changes the MMU hash, it must be protected by a lock.
227 * However, because it maps only a single page, the lock should be implemented
228 * in a higher level in order to protect the entire mapping of the memory area
230 * For optimization reasons PCI flush may be requested once after mapping of
233 int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
234 u32 page_size, bool flush_pte)
236 struct hl_device *hdev = ctx->hdev;
237 struct asic_fixed_properties *prop = &hdev->asic_prop;
238 struct hl_mmu_properties *mmu_prop;
239 u64 real_virt_addr, real_phys_addr;
240 u32 real_page_size, npages;
241 int i, rc, pgt_residency, mapped_cnt = 0;
245 if (!hdev->mmu_enable)
248 is_dram_addr = hl_is_dram_va(hdev, virt_addr);
251 mmu_prop = &prop->dmmu;
252 else if ((page_size % prop->pmmu_huge.page_size) == 0)
253 mmu_prop = &prop->pmmu_huge;
255 mmu_prop = &prop->pmmu;
257 pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
260 * The H/W handles mapping of specific page sizes. Hence if the page
261 * size is bigger, we break it to sub-pages and map them separately.
263 if ((page_size % mmu_prop->page_size) == 0) {
264 real_page_size = mmu_prop->page_size;
265 } else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) &&
266 (prop->dram_page_size < mmu_prop->page_size)) {
268 * MMU page size may differ from DRAM page size.
269 * In such case work with the DRAM page size and let the MMU
270 * scrambling routine handle this mismatch when calculating
271 * the address to place in the MMU page table. (in that case
272 * also make sure that the dram_page_size smaller than the
275 real_page_size = prop->dram_page_size;
278 "page size of %u is not %uKB aligned, can't map\n",
279 page_size, mmu_prop->page_size >> 10);
285 * Verify that the phys and virt addresses are aligned with the
286 * MMU page size (in dram this means checking the address and MMU
290 ((hdev->asic_funcs->scramble_addr(hdev, phys_addr) &
291 (mmu_prop->page_size - 1)) ||
292 (hdev->asic_funcs->scramble_addr(hdev, virt_addr) &
293 (mmu_prop->page_size - 1)))) ||
294 (!is_dram_addr && ((phys_addr & (real_page_size - 1)) ||
295 (virt_addr & (real_page_size - 1)))))
297 "Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size",
298 phys_addr, virt_addr, real_page_size);
300 npages = page_size / real_page_size;
301 real_virt_addr = virt_addr;
302 real_phys_addr = phys_addr;
304 for (i = 0 ; i < npages ; i++) {
305 rc = hdev->mmu_func[pgt_residency].map(ctx,
306 real_virt_addr, real_phys_addr,
307 real_page_size, is_dram_addr);
311 real_virt_addr += real_page_size;
312 real_phys_addr += real_page_size;
317 hdev->mmu_func[pgt_residency].flush(ctx);
322 real_virt_addr = virt_addr;
323 for (i = 0 ; i < mapped_cnt ; i++) {
324 if (hdev->mmu_func[pgt_residency].unmap(ctx,
325 real_virt_addr, is_dram_addr))
326 dev_warn_ratelimited(hdev->dev,
327 "failed to unmap va: 0x%llx\n", real_virt_addr);
329 real_virt_addr += real_page_size;
332 hdev->mmu_func[pgt_residency].flush(ctx);
338 * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page
339 * for mapping contiguous physical memory
341 * @ctx: pointer to the context structure
342 * @virt_addr: virt addr to map from
343 * @phys_addr: phys addr to map to
347 int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
348 u64 phys_addr, u32 size)
350 struct hl_device *hdev = ctx->hdev;
351 struct asic_fixed_properties *prop = &hdev->asic_prop;
352 u64 curr_va, curr_pa;
357 if (hl_mem_area_inside_range(virt_addr, size,
358 prop->dmmu.start_addr, prop->dmmu.end_addr))
359 page_size = prop->dmmu.page_size;
360 else if (hl_mem_area_inside_range(virt_addr, size,
361 prop->pmmu.start_addr, prop->pmmu.end_addr))
362 page_size = prop->pmmu.page_size;
363 else if (hl_mem_area_inside_range(virt_addr, size,
364 prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
365 page_size = prop->pmmu_huge.page_size;
369 for (off = 0 ; off < size ; off += page_size) {
370 curr_va = virt_addr + off;
371 curr_pa = phys_addr + off;
372 flush_pte = (off + page_size) >= size;
373 rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size,
377 "Map failed for va 0x%llx to pa 0x%llx\n",
386 for (; off >= 0 ; off -= page_size) {
387 curr_va = virt_addr + off;
388 flush_pte = (off - (s32) page_size) < 0;
389 if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte))
390 dev_warn_ratelimited(hdev->dev,
391 "failed to unmap va 0x%llx\n", curr_va);
398 * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page
399 * for unmapping contiguous physical memory
401 * @ctx: pointer to the context structure
402 * @virt_addr: virt addr to unmap
403 * @size: size to unmap
406 int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size)
408 struct hl_device *hdev = ctx->hdev;
409 struct asic_fixed_properties *prop = &hdev->asic_prop;
415 if (hl_mem_area_inside_range(virt_addr, size,
416 prop->dmmu.start_addr, prop->dmmu.end_addr))
417 page_size = prop->dmmu.page_size;
418 else if (hl_mem_area_inside_range(virt_addr, size,
419 prop->pmmu.start_addr, prop->pmmu.end_addr))
420 page_size = prop->pmmu.page_size;
421 else if (hl_mem_area_inside_range(virt_addr, size,
422 prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
423 page_size = prop->pmmu_huge.page_size;
427 for (off = 0 ; off < size ; off += page_size) {
428 curr_va = virt_addr + off;
429 flush_pte = (off + page_size) >= size;
430 rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte);
432 dev_warn_ratelimited(hdev->dev,
433 "Unmap failed for va 0x%llx\n", curr_va);
440 * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
442 * @ctx: pointer to the context structure
445 void hl_mmu_swap_out(struct hl_ctx *ctx)
447 struct hl_device *hdev = ctx->hdev;
449 if (!hdev->mmu_enable)
452 if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL)
453 hdev->mmu_func[MMU_DR_PGT].swap_out(ctx);
455 if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL)
456 hdev->mmu_func[MMU_HR_PGT].swap_out(ctx);
460 * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
462 * @ctx: pointer to the context structure
465 void hl_mmu_swap_in(struct hl_ctx *ctx)
467 struct hl_device *hdev = ctx->hdev;
469 if (!hdev->mmu_enable)
472 if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL)
473 hdev->mmu_func[MMU_DR_PGT].swap_in(ctx);
475 if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL)
476 hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
479 static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
480 struct hl_mmu_hop_info *hops,
483 struct hl_device *hdev = ctx->hdev;
484 struct asic_fixed_properties *prop = &hdev->asic_prop;
485 u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr;
489 /* last hop holds the phys address and flags */
490 if (hops->unscrambled_paddr)
491 tmp_phys_addr = hops->unscrambled_paddr;
493 tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val;
495 if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE)
496 p = &prop->pmmu_huge;
497 else if (hops->range_type == HL_VA_RANGE_TYPE_HOST)
499 else /* HL_VA_RANGE_TYPE_DRAM */
502 if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
503 !is_power_of_2(prop->dram_page_size)) {
505 u64 page_offset_mask;
508 bit = __ffs64((u64)prop->dram_page_size);
509 page_offset_mask = ((1ull << bit) - 1);
510 phys_addr_mask = ~page_offset_mask;
511 *phys_addr = (tmp_phys_addr & phys_addr_mask) |
512 (virt_addr & page_offset_mask);
515 * find the correct hop shift field in hl_mmu_properties
516 * structure in order to determine the right masks
517 * for the page offset.
519 hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
520 p = (char *)p + hop0_shift_off;
521 p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
522 hop_shift = *(u64 *)p;
523 offset_mask = (1ull << hop_shift) - 1;
524 addr_mask = ~(offset_mask);
525 *phys_addr = (tmp_phys_addr & addr_mask) |
526 (virt_addr & offset_mask);
530 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
532 struct hl_mmu_hop_info hops;
535 rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
539 hl_mmu_pa_page_with_offset(ctx, virt_addr, &hops, phys_addr);
544 int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
545 struct hl_mmu_hop_info *hops)
547 struct hl_device *hdev = ctx->hdev;
548 struct asic_fixed_properties *prop = &hdev->asic_prop;
549 struct hl_mmu_properties *mmu_prop;
553 if (!hdev->mmu_enable)
556 hops->scrambled_vaddr = virt_addr; /* assume no scrambling */
558 is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
559 prop->dmmu.start_addr,
560 prop->dmmu.end_addr);
562 /* host-residency is the same in PMMU and HPMMU, use one of them */
563 mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
565 mutex_lock(&ctx->mmu_lock);
567 if (mmu_prop->host_resident)
568 rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx,
571 rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx,
574 mutex_unlock(&ctx->mmu_lock);
576 /* add page offset to physical address */
577 if (hops->unscrambled_paddr)
578 hl_mmu_pa_page_with_offset(ctx, virt_addr, hops,
579 &hops->unscrambled_paddr);
584 int hl_mmu_if_set_funcs(struct hl_device *hdev)
586 if (!hdev->mmu_enable)
589 switch (hdev->asic_type) {
592 hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
595 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
604 * hl_mmu_scramble_addr() - The generic mmu address scrambling routine.
605 * @hdev: pointer to device data.
606 * @addr: The address to scramble.
608 * Return: The scrambled address.
610 u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr)
616 * hl_mmu_descramble_addr() - The generic mmu address descrambling
618 * @hdev: pointer to device data.
619 * @addr: The address to descramble.
621 * Return: The un-scrambled address.
623 u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr)