2 * Re-map IO memory to kernel address space so that we can access it.
3 * This is needed for high PCI addresses that aren't mapped in the
4 * 640k-1MB IO memory area on PC's
6 * (C) Copyright 1995 1996 Linus Torvalds
9 #include <linux/bootmem.h>
10 #include <linux/init.h>
12 #include <linux/ioport.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mmiotrace.h>
16 #include <linux/mem_encrypt.h>
17 #include <linux/efi.h>
19 #include <asm/set_memory.h>
20 #include <asm/e820/api.h>
21 #include <asm/fixmap.h>
22 #include <asm/pgtable.h>
23 #include <asm/tlbflush.h>
24 #include <asm/pgalloc.h>
26 #include <asm/setup.h>
31 * Fix up the linear direct mapping of the kernel to avoid cache attribute
34 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
35 enum page_cache_mode pcm)
37 unsigned long nrpages = size >> PAGE_SHIFT;
41 case _PAGE_CACHE_MODE_UC:
43 err = _set_memory_uc(vaddr, nrpages);
45 case _PAGE_CACHE_MODE_WC:
46 err = _set_memory_wc(vaddr, nrpages);
48 case _PAGE_CACHE_MODE_WT:
49 err = _set_memory_wt(vaddr, nrpages);
51 case _PAGE_CACHE_MODE_WB:
52 err = _set_memory_wb(vaddr, nrpages);
59 static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
64 for (i = 0; i < nr_pages; ++i)
65 if (pfn_valid(start_pfn + i) &&
66 !PageReserved(pfn_to_page(start_pfn + i)))
73 * Remap an arbitrary physical address space into the kernel virtual
74 * address space. It transparently creates kernel huge I/O mapping when
75 * the physical address is aligned by a huge page size (1GB or 2MB) and
76 * the requested size is at least the huge page size.
78 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
79 * Therefore, the mapping code falls back to use a smaller page toward 4KB
80 * when a mapping range is covered by non-WB type of MTRRs.
82 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
83 * have to convert them into an offset in a page-aligned mapping, but the
84 * caller shouldn't need to know that small detail.
86 static void __iomem *__ioremap_caller(resource_size_t phys_addr,
87 unsigned long size, enum page_cache_mode pcm, void *caller)
89 unsigned long offset, vaddr;
90 resource_size_t pfn, last_pfn, last_addr;
91 const resource_size_t unaligned_phys_addr = phys_addr;
92 const unsigned long unaligned_size = size;
93 struct vm_struct *area;
94 enum page_cache_mode new_pcm;
97 void __iomem *ret_addr;
99 /* Don't allow wraparound or zero size */
100 last_addr = phys_addr + size - 1;
101 if (!size || last_addr < phys_addr)
104 if (!phys_addr_valid(phys_addr)) {
105 printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
106 (unsigned long long)phys_addr);
112 * Don't allow anybody to remap normal RAM that we're using..
114 pfn = phys_addr >> PAGE_SHIFT;
115 last_pfn = last_addr >> PAGE_SHIFT;
116 if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
117 __ioremap_check_ram) == 1) {
118 WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
119 &phys_addr, &last_addr);
124 * Mappings have to be page-aligned
126 offset = phys_addr & ~PAGE_MASK;
127 phys_addr &= PHYSICAL_PAGE_MASK;
128 size = PAGE_ALIGN(last_addr+1) - phys_addr;
130 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
133 printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
137 if (pcm != new_pcm) {
138 if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
140 "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
141 (unsigned long long)phys_addr,
142 (unsigned long long)(phys_addr + size),
144 goto err_free_memtype;
149 prot = PAGE_KERNEL_IO;
151 case _PAGE_CACHE_MODE_UC:
153 prot = __pgprot(pgprot_val(prot) |
154 cachemode2protval(_PAGE_CACHE_MODE_UC));
156 case _PAGE_CACHE_MODE_UC_MINUS:
157 prot = __pgprot(pgprot_val(prot) |
158 cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
160 case _PAGE_CACHE_MODE_WC:
161 prot = __pgprot(pgprot_val(prot) |
162 cachemode2protval(_PAGE_CACHE_MODE_WC));
164 case _PAGE_CACHE_MODE_WT:
165 prot = __pgprot(pgprot_val(prot) |
166 cachemode2protval(_PAGE_CACHE_MODE_WT));
168 case _PAGE_CACHE_MODE_WB:
175 area = get_vm_area_caller(size, VM_IOREMAP, caller);
177 goto err_free_memtype;
178 area->phys_addr = phys_addr;
179 vaddr = (unsigned long) area->addr;
181 if (kernel_map_sync_memtype(phys_addr, size, pcm))
184 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
187 ret_addr = (void __iomem *) (vaddr + offset);
188 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
191 * Check if the request spans more than any BAR in the iomem resource
194 if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
195 pr_warn("caller %pS mapping multiple BARs\n", caller);
201 free_memtype(phys_addr, phys_addr + size);
206 * ioremap_nocache - map bus memory into CPU space
207 * @phys_addr: bus address of the memory
208 * @size: size of the resource to map
210 * ioremap_nocache performs a platform specific sequence of operations to
211 * make bus memory CPU accessible via the readb/readw/readl/writeb/
212 * writew/writel functions and the other mmio helpers. The returned
213 * address is not guaranteed to be usable directly as a virtual
216 * This version of ioremap ensures that the memory is marked uncachable
217 * on the CPU as well as honouring existing caching rules from things like
218 * the PCI bus. Note that there are other caches and buffers on many
219 * busses. In particular driver authors should read up on PCI writes
221 * It's useful if some control registers are in such an area and
222 * write combining or read caching is not desirable:
224 * Must be freed with iounmap.
226 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
229 * Ideally, this should be:
230 * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
232 * Till we fix all X drivers to use ioremap_wc(), we will use
233 * UC MINUS. Drivers that are certain they need or can already
234 * be converted over to strong UC can use ioremap_uc().
236 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
238 return __ioremap_caller(phys_addr, size, pcm,
239 __builtin_return_address(0));
241 EXPORT_SYMBOL(ioremap_nocache);
244 * ioremap_uc - map bus memory into CPU space as strongly uncachable
245 * @phys_addr: bus address of the memory
246 * @size: size of the resource to map
248 * ioremap_uc performs a platform specific sequence of operations to
249 * make bus memory CPU accessible via the readb/readw/readl/writeb/
250 * writew/writel functions and the other mmio helpers. The returned
251 * address is not guaranteed to be usable directly as a virtual
254 * This version of ioremap ensures that the memory is marked with a strong
255 * preference as completely uncachable on the CPU when possible. For non-PAT
256 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
257 * systems this will set the PAT entry for the pages as strong UC. This call
258 * will honor existing caching rules from things like the PCI bus. Note that
259 * there are other caches and buffers on many busses. In particular driver
260 * authors should read up on PCI writes.
262 * It's useful if some control registers are in such an area and
263 * write combining or read caching is not desirable:
265 * Must be freed with iounmap.
267 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
269 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
271 return __ioremap_caller(phys_addr, size, pcm,
272 __builtin_return_address(0));
274 EXPORT_SYMBOL_GPL(ioremap_uc);
277 * ioremap_wc - map memory into CPU space write combined
278 * @phys_addr: bus address of the memory
279 * @size: size of the resource to map
281 * This version of ioremap ensures that the memory is marked write combining.
282 * Write combining allows faster writes to some hardware devices.
284 * Must be freed with iounmap.
286 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
288 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
289 __builtin_return_address(0));
291 EXPORT_SYMBOL(ioremap_wc);
294 * ioremap_wt - map memory into CPU space write through
295 * @phys_addr: bus address of the memory
296 * @size: size of the resource to map
298 * This version of ioremap ensures that the memory is marked write through.
299 * Write through stores data into memory while keeping the cache up-to-date.
301 * Must be freed with iounmap.
303 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
305 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
306 __builtin_return_address(0));
308 EXPORT_SYMBOL(ioremap_wt);
310 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
312 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
313 __builtin_return_address(0));
315 EXPORT_SYMBOL(ioremap_cache);
317 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
318 unsigned long prot_val)
320 return __ioremap_caller(phys_addr, size,
321 pgprot2cachemode(__pgprot(prot_val)),
322 __builtin_return_address(0));
324 EXPORT_SYMBOL(ioremap_prot);
327 * iounmap - Free a IO remapping
328 * @addr: virtual address from ioremap_*
330 * Caller must ensure there is only one unmapping for the same pointer.
332 void iounmap(volatile void __iomem *addr)
334 struct vm_struct *p, *o;
336 if ((void __force *)addr <= high_memory)
340 * The PCI/ISA range special-casing was removed from __ioremap()
341 * so this check, in theory, can be removed. However, there are
342 * cases where iounmap() is called for addresses not obtained via
343 * ioremap() (vga16fb for example). Add a warning so that these
344 * cases can be caught and fixed.
346 if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
347 (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
348 WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
352 addr = (volatile void __iomem *)
353 (PAGE_MASK & (unsigned long __force)addr);
355 mmiotrace_iounmap(addr);
357 /* Use the vm area unlocked, assuming the caller
358 ensures there isn't another iounmap for the same address
359 in parallel. Reuse of the virtual address is prevented by
360 leaving it in the global lists until we're done with it.
361 cpa takes care of the direct mappings. */
362 p = find_vm_area((void __force *)addr);
365 printk(KERN_ERR "iounmap: bad address %p\n", addr);
370 free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
372 /* Finally remove it */
373 o = remove_vm_area((void __force *)addr);
374 BUG_ON(p != o || o == NULL);
377 EXPORT_SYMBOL(iounmap);
379 int __init arch_ioremap_pud_supported(void)
382 return boot_cpu_has(X86_FEATURE_GBPAGES);
388 int __init arch_ioremap_pmd_supported(void)
390 return boot_cpu_has(X86_FEATURE_PSE);
394 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
397 void *xlate_dev_mem_ptr(phys_addr_t phys)
399 unsigned long start = phys & PAGE_MASK;
400 unsigned long offset = phys & ~PAGE_MASK;
403 /* memremap() maps if RAM, otherwise falls back to ioremap() */
404 vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
406 /* Only add the offset on success and return NULL if memremap() failed */
413 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
415 memunmap((void *)((unsigned long)addr & PAGE_MASK));
419 * Examine the physical address to determine if it is an area of memory
420 * that should be mapped decrypted. If the memory is not part of the
421 * kernel usable area it was accessed and created decrypted, so these
422 * areas should be mapped decrypted. And since the encryption key can
423 * change across reboots, persistent memory should also be mapped
426 * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
427 * only persistent memory should be mapped decrypted.
429 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
435 * Check if the address is part of a persistent memory region.
436 * This check covers areas added by E820, EFI and ACPI.
438 is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
439 IORES_DESC_PERSISTENT_MEMORY);
440 if (is_pmem != REGION_DISJOINT)
444 * Check if the non-volatile attribute is set for an EFI
447 if (efi_enabled(EFI_BOOT)) {
448 switch (efi_mem_type(phys_addr)) {
449 case EFI_RESERVED_TYPE:
450 if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
458 /* Check if the address is outside kernel usable area */
459 switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
460 case E820_TYPE_RESERVED:
463 case E820_TYPE_UNUSABLE:
464 /* For SEV, these areas are encrypted */
479 * Examine the physical address to determine if it is EFI data. Check
480 * it against the boot params structure and EFI tables and memory types.
482 static bool memremap_is_efi_data(resource_size_t phys_addr,
487 /* Check if the address is part of EFI boot/runtime data */
488 if (!efi_enabled(EFI_BOOT))
491 paddr = boot_params.efi_info.efi_memmap_hi;
493 paddr |= boot_params.efi_info.efi_memmap;
494 if (phys_addr == paddr)
497 paddr = boot_params.efi_info.efi_systab_hi;
499 paddr |= boot_params.efi_info.efi_systab;
500 if (phys_addr == paddr)
503 if (efi_is_table_address(phys_addr))
506 switch (efi_mem_type(phys_addr)) {
507 case EFI_BOOT_SERVICES_DATA:
508 case EFI_RUNTIME_SERVICES_DATA:
518 * Examine the physical address to determine if it is boot data by checking
519 * it against the boot params setup_data chain.
521 static bool memremap_is_setup_data(resource_size_t phys_addr,
524 struct setup_data *data;
525 u64 paddr, paddr_next;
527 paddr = boot_params.hdr.setup_data;
531 if (phys_addr == paddr)
534 data = memremap(paddr, sizeof(*data),
535 MEMREMAP_WB | MEMREMAP_DEC);
537 paddr_next = data->next;
542 if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
552 * Examine the physical address to determine if it is boot data by checking
553 * it against the boot params setup_data chain (early boot version).
555 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
558 struct setup_data *data;
559 u64 paddr, paddr_next;
561 paddr = boot_params.hdr.setup_data;
565 if (phys_addr == paddr)
568 data = early_memremap_decrypted(paddr, sizeof(*data));
570 paddr_next = data->next;
573 early_memunmap(data, sizeof(*data));
575 if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
585 * Architecture function to determine if RAM remap is allowed. By default, a
586 * RAM remap will map the data as encrypted. Determine if a RAM remap should
587 * not be done so that the data will be mapped decrypted.
589 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
592 if (!mem_encrypt_active())
595 if (flags & MEMREMAP_ENC)
598 if (flags & MEMREMAP_DEC)
602 if (memremap_is_setup_data(phys_addr, size) ||
603 memremap_is_efi_data(phys_addr, size))
607 return !memremap_should_map_decrypted(phys_addr, size);
611 * Architecture override of __weak function to adjust the protection attributes
612 * used when remapping memory. By default, early_memremap() will map the data
613 * as encrypted. Determine if an encrypted mapping should not be done and set
614 * the appropriate protection attributes.
616 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
622 if (!mem_encrypt_active())
625 encrypted_prot = true;
628 if (early_memremap_is_setup_data(phys_addr, size) ||
629 memremap_is_efi_data(phys_addr, size))
630 encrypted_prot = false;
633 if (encrypted_prot && memremap_should_map_decrypted(phys_addr, size))
634 encrypted_prot = false;
636 return encrypted_prot ? pgprot_encrypted(prot)
637 : pgprot_decrypted(prot);
640 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
642 return arch_memremap_can_ram_remap(phys_addr, size, 0);
645 #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
646 /* Remap memory with encryption */
647 void __init *early_memremap_encrypted(resource_size_t phys_addr,
650 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
654 * Remap memory with encryption and write-protected - cannot be called
655 * before pat_init() is called
657 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
660 /* Be sure the write-protect PAT entry is set for write-protect */
661 if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
664 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
667 /* Remap memory without encryption */
668 void __init *early_memremap_decrypted(resource_size_t phys_addr,
671 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
675 * Remap memory without encryption and write-protected - cannot be called
676 * before pat_init() is called
678 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
681 /* Be sure the write-protect PAT entry is set for write-protect */
682 if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
685 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
687 #endif /* CONFIG_ARCH_USE_MEMREMAP_PROT */
689 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
691 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
693 /* Don't assume we're using swapper_pg_dir at this point */
694 pgd_t *base = __va(read_cr3_pa());
695 pgd_t *pgd = &base[pgd_index(addr)];
696 p4d_t *p4d = p4d_offset(pgd, addr);
697 pud_t *pud = pud_offset(p4d, addr);
698 pmd_t *pmd = pmd_offset(pud, addr);
703 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
705 return &bm_pte[pte_index(addr)];
708 bool __init is_early_ioremap_ptep(pte_t *ptep)
710 return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
713 void __init early_ioremap_init(void)
718 BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
720 WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
723 early_ioremap_setup();
725 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
726 memset(bm_pte, 0, sizeof(bm_pte));
727 pmd_populate_kernel(&init_mm, pmd, bm_pte);
730 * The boot-ioremap range spans multiple pmds, for which
731 * we are not prepared:
733 #define __FIXADDR_TOP (-PAGE_SIZE)
734 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
735 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
737 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
739 printk(KERN_WARNING "pmd %p != %p\n",
740 pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
741 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
742 fix_to_virt(FIX_BTMAP_BEGIN));
743 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
744 fix_to_virt(FIX_BTMAP_END));
746 printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
747 printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
752 void __init __early_set_fixmap(enum fixed_addresses idx,
753 phys_addr_t phys, pgprot_t flags)
755 unsigned long addr = __fix_to_virt(idx);
758 if (idx >= __end_of_fixed_addresses) {
762 pte = early_ioremap_pte(addr);
764 if (pgprot_val(flags))
765 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
767 pte_clear(&init_mm, addr, pte);
768 __flush_tlb_one(addr);