x86: prepare for pageattr.c unification
[sfrench/cifs-2.6.git] / arch / x86 / mm / pageattr_64.c
1 /*
2  * Copyright 2002 Andi Kleen, SuSE Labs.
3  * Thanks to Ben LaHaise for precious feedback.
4  */
5
6 #include <linux/highmem.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/mm.h>
11
12 #include <asm/processor.h>
13 #include <asm/tlbflush.h>
14 #include <asm/sections.h>
15 #include <asm/uaccess.h>
16 #include <asm/pgalloc.h>
17
18 pte_t *lookup_address(unsigned long address, int *level)
19 {
20         pgd_t *pgd = pgd_offset_k(address);
21         pud_t *pud;
22         pmd_t *pmd;
23
24         if (pgd_none(*pgd))
25                 return NULL;
26         pud = pud_offset(pgd, address);
27         if (pud_none(*pud))
28                 return NULL;
29         pmd = pmd_offset(pud, address);
30         if (pmd_none(*pmd))
31                 return NULL;
32         *level = 3;
33         if (pmd_large(*pmd))
34                 return (pte_t *)pmd;
35         *level = 4;
36
37         return pte_offset_kernel(pmd, address);
38 }
39
40 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
41 {
42         /* change init_mm */
43         set_pte_atomic(kpte, pte);
44 #ifdef CONFIG_X86_32
45         if (SHARED_KERNEL_PMD)
46                 return;
47         {
48                 struct page *page;
49
50                 for (page = pgd_list; page; page = (struct page *)page->index) {
51                         pgd_t *pgd;
52                         pud_t *pud;
53                         pmd_t *pmd;
54
55                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
56                         pud = pud_offset(pgd, address);
57                         pmd = pmd_offset(pud, address);
58                         set_pte_atomic((pte_t *)pmd, pte);
59                 }
60         }
61 #endif
62 }
63
64 static int split_large_page(pte_t *kpte, unsigned long address)
65 {
66         pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
67         gfp_t gfp_flags = GFP_KERNEL;
68         unsigned long flags;
69         unsigned long addr;
70         pte_t *pbase, *tmp;
71         struct page *base;
72         int i, level;
73
74
75 #ifdef CONFIG_DEBUG_PAGEALLOC
76         gfp_flags = GFP_ATOMIC;
77 #endif
78         base = alloc_pages(gfp_flags, 0);
79         if (!base)
80                 return -ENOMEM;
81
82         spin_lock_irqsave(&pgd_lock, flags);
83         /*
84          * Check for races, another CPU might have split this page
85          * up for us already:
86          */
87         tmp = lookup_address(address, &level);
88         if (tmp != kpte) {
89                 WARN_ON_ONCE(1);
90                 goto out_unlock;
91         }
92
93         address = __pa(address);
94         addr = address & LARGE_PAGE_MASK;
95         pbase = (pte_t *)page_address(base);
96 #ifdef CONFIG_X86_32
97         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
98 #endif
99
100         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
101                 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
102
103         /*
104          * Install the new, split up pagetable:
105          */
106         __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
107         base = NULL;
108
109 out_unlock:
110         spin_unlock_irqrestore(&pgd_lock, flags);
111
112         if (base)
113                 __free_pages(base, 0);
114
115         return 0;
116 }
117
118 static int
119 __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
120 {
121         struct page *kpte_page;
122         int level, err = 0;
123         pte_t *kpte;
124
125         BUG_ON(PageHighMem(page));
126
127 repeat:
128         kpte = lookup_address(address, &level);
129         if (!kpte)
130                 return -EINVAL;
131
132         kpte_page = virt_to_page(kpte);
133         BUG_ON(PageLRU(kpte_page));
134         BUG_ON(PageCompound(kpte_page));
135
136         /*
137          * Better fail early if someone sets the kernel text to NX.
138          * Does not cover __inittext
139          */
140         BUG_ON(address >= (unsigned long)&_text &&
141                 address < (unsigned long)&_etext &&
142                (pgprot_val(prot) & _PAGE_NX));
143
144         if (level == 4) {
145                 set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
146         } else {
147                 err = split_large_page(kpte, address);
148                 if (!err)
149                         goto repeat;
150         }
151         return err;
152 }
153
154 /**
155  * change_page_attr_addr - Change page table attributes in linear mapping
156  * @address: Virtual address in linear mapping.
157  * @numpages: Number of pages to change
158  * @prot:    New page table attribute (PAGE_*)
159  *
160  * Change page attributes of a page in the direct mapping. This is a variant
161  * of change_page_attr() that also works on memory holes that do not have
162  * mem_map entry (pfn_valid() is false).
163  *
164  * See change_page_attr() documentation for more details.
165  */
166
167 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
168 {
169         int err = 0, kernel_map = 0, i;
170
171 #ifdef CONFIG_X86_64
172         if (address >= __START_KERNEL_map &&
173                         address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
174
175                 address = (unsigned long)__va(__pa(address));
176                 kernel_map = 1;
177         }
178 #endif
179
180         for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
181                 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
182
183                 if (!kernel_map || pte_present(pfn_pte(0, prot))) {
184                         err = __change_page_attr(address, pfn_to_page(pfn), prot);
185                         if (err)
186                                 break;
187                 }
188 #ifdef CONFIG_X86_64
189                 /*
190                  * Handle kernel mapping too which aliases part of
191                  * lowmem:
192                  */
193                 if (__pa(address) < KERNEL_TEXT_SIZE) {
194                         unsigned long addr2;
195                         pgprot_t prot2;
196
197                         addr2 = __START_KERNEL_map + __pa(address);
198                         /* Make sure the kernel mappings stay executable */
199                         prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
200                         err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
201                 }
202 #endif
203         }
204
205         return err;
206 }
207
208 /**
209  * change_page_attr - Change page table attributes in the linear mapping.
210  * @page: First page to change
211  * @numpages: Number of pages to change
212  * @prot: New protection/caching type (PAGE_*)
213  *
214  * Returns 0 on success, otherwise a negated errno.
215  *
216  * This should be used when a page is mapped with a different caching policy
217  * than write-back somewhere - some CPUs do not like it when mappings with
218  * different caching policies exist. This changes the page attributes of the
219  * in kernel linear mapping too.
220  *
221  * Caller must call global_flush_tlb() later to make the changes active.
222  *
223  * The caller needs to ensure that there are no conflicting mappings elsewhere
224  * (e.g. in user space) * This function only deals with the kernel linear map.
225  *
226  * For MMIO areas without mem_map use change_page_attr_addr() instead.
227  */
228 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
229 {
230         unsigned long addr = (unsigned long)page_address(page);
231
232         return change_page_attr_addr(addr, numpages, prot);
233 }
234 EXPORT_SYMBOL(change_page_attr);
235
236 static void flush_kernel_map(void *arg)
237 {
238         /*
239          * Flush all to work around Errata in early athlons regarding
240          * large page flushing.
241          */
242         __flush_tlb_all();
243
244         if (boot_cpu_data.x86_model >= 4)
245                 wbinvd();
246 }
247
248 void global_flush_tlb(void)
249 {
250         BUG_ON(irqs_disabled());
251
252         on_each_cpu(flush_kernel_map, NULL, 1, 1);
253 }
254 EXPORT_SYMBOL(global_flush_tlb);
255
256 #ifdef CONFIG_DEBUG_PAGEALLOC
257 void kernel_map_pages(struct page *page, int numpages, int enable)
258 {
259         if (PageHighMem(page))
260                 return;
261         if (!enable) {
262                 debug_check_no_locks_freed(page_address(page),
263                                            numpages * PAGE_SIZE);
264         }
265
266         /*
267          * If page allocator is not up yet then do not call c_p_a():
268          */
269         if (!debug_pagealloc_enabled)
270                 return;
271
272         /*
273          * the return value is ignored - the calls cannot fail,
274          * large pages are disabled at boot time.
275          */
276         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
277
278         /*
279          * we should perform an IPI and flush all tlbs,
280          * but that can deadlock->flush only current cpu.
281          */
282         __flush_tlb_all();
283 }
284 #endif