x86: prepare for pageattr.c unification
[sfrench/cifs-2.6.git] / arch / x86 / mm / pageattr.c
1 /*
2  * Copyright 2002 Andi Kleen, SuSE Labs.
3  * Thanks to Ben LaHaise for precious feedback.
4  */
5
6 #include <linux/highmem.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/mm.h>
11
12 #include <asm/processor.h>
13 #include <asm/tlbflush.h>
14 #include <asm/sections.h>
15 #include <asm/uaccess.h>
16 #include <asm/pgalloc.h>
17
18 pte_t *lookup_address(unsigned long address, int *level)
19 {
20         pgd_t *pgd = pgd_offset_k(address);
21         pud_t *pud;
22         pmd_t *pmd;
23
24         if (pgd_none(*pgd))
25                 return NULL;
26         pud = pud_offset(pgd, address);
27         if (pud_none(*pud))
28                 return NULL;
29         pmd = pmd_offset(pud, address);
30         if (pmd_none(*pmd))
31                 return NULL;
32         *level = 3;
33         if (pmd_large(*pmd))
34                 return (pte_t *)pmd;
35         *level = 4;
36
37         return pte_offset_kernel(pmd, address);
38 }
39
40 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
41 {
42         /* change init_mm */
43         set_pte_atomic(kpte, pte);
44 #ifdef CONFIG_X86_32
45         if (SHARED_KERNEL_PMD)
46                 return;
47         {
48                 struct page *page;
49
50                 for (page = pgd_list; page; page = (struct page *)page->index) {
51                         pgd_t *pgd;
52                         pud_t *pud;
53                         pmd_t *pmd;
54
55                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
56                         pud = pud_offset(pgd, address);
57                         pmd = pmd_offset(pud, address);
58                         set_pte_atomic((pte_t *)pmd, pte);
59                 }
60         }
61 #endif
62 }
63
64 static int split_large_page(pte_t *kpte, unsigned long address)
65 {
66         pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
67         gfp_t gfp_flags = GFP_KERNEL;
68         unsigned long flags;
69         unsigned long addr;
70         pte_t *pbase, *tmp;
71         struct page *base;
72         int i, level;
73
74 #ifdef CONFIG_DEBUG_PAGEALLOC
75         gfp_flags = GFP_ATOMIC;
76 #endif
77         base = alloc_pages(gfp_flags, 0);
78         if (!base)
79                 return -ENOMEM;
80
81         spin_lock_irqsave(&pgd_lock, flags);
82         /*
83          * Check for races, another CPU might have split this page
84          * up for us already:
85          */
86         tmp = lookup_address(address, &level);
87         if (tmp != kpte) {
88                 WARN_ON_ONCE(1);
89                 goto out_unlock;
90         }
91
92         address = __pa(address);
93         addr = address & LARGE_PAGE_MASK;
94         pbase = (pte_t *)page_address(base);
95 #ifdef CONFIG_X86_32
96         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
97 #endif
98
99         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
100                 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
101
102         /*
103          * Install the new, split up pagetable:
104          */
105         __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
106         base = NULL;
107
108 out_unlock:
109         spin_unlock_irqrestore(&pgd_lock, flags);
110
111         if (base)
112                 __free_pages(base, 0);
113
114         return 0;
115 }
116
117 static int
118 __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
119 {
120         struct page *kpte_page;
121         int level, err = 0;
122         pte_t *kpte;
123
124         BUG_ON(PageHighMem(page));
125
126 repeat:
127         kpte = lookup_address(address, &level);
128         if (!kpte)
129                 return -EINVAL;
130
131         kpte_page = virt_to_page(kpte);
132         BUG_ON(PageLRU(kpte_page));
133         BUG_ON(PageCompound(kpte_page));
134
135         /*
136          * Better fail early if someone sets the kernel text to NX.
137          * Does not cover __inittext
138          */
139         BUG_ON(address >= (unsigned long)&_text &&
140                 address < (unsigned long)&_etext &&
141                (pgprot_val(prot) & _PAGE_NX));
142
143         if (level == 4) {
144                 set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
145         } else {
146                 err = split_large_page(kpte, address);
147                 if (!err)
148                         goto repeat;
149         }
150         return err;
151 }
152
153 /**
154  * change_page_attr_addr - Change page table attributes in linear mapping
155  * @address: Virtual address in linear mapping.
156  * @numpages: Number of pages to change
157  * @prot:    New page table attribute (PAGE_*)
158  *
159  * Change page attributes of a page in the direct mapping. This is a variant
160  * of change_page_attr() that also works on memory holes that do not have
161  * mem_map entry (pfn_valid() is false).
162  *
163  * See change_page_attr() documentation for more details.
164  */
165
166 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
167 {
168         int err = 0, kernel_map = 0, i;
169
170 #ifdef CONFIG_X86_64
171         if (address >= __START_KERNEL_map &&
172                         address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
173
174                 address = (unsigned long)__va(__pa(address));
175                 kernel_map = 1;
176         }
177 #endif
178
179         for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
180                 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
181
182                 if (!kernel_map || pte_present(pfn_pte(0, prot))) {
183                         err = __change_page_attr(address, pfn_to_page(pfn), prot);
184                         if (err)
185                                 break;
186                 }
187 #ifdef CONFIG_X86_64
188                 /*
189                  * Handle kernel mapping too which aliases part of
190                  * lowmem:
191                  */
192                 if (__pa(address) < KERNEL_TEXT_SIZE) {
193                         unsigned long addr2;
194                         pgprot_t prot2;
195
196                         addr2 = __START_KERNEL_map + __pa(address);
197                         /* Make sure the kernel mappings stay executable */
198                         prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
199                         err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
200                 }
201 #endif
202         }
203
204         return err;
205 }
206
207 /**
208  * change_page_attr - Change page table attributes in the linear mapping.
209  * @page: First page to change
210  * @numpages: Number of pages to change
211  * @prot: New protection/caching type (PAGE_*)
212  *
213  * Returns 0 on success, otherwise a negated errno.
214  *
215  * This should be used when a page is mapped with a different caching policy
216  * than write-back somewhere - some CPUs do not like it when mappings with
217  * different caching policies exist. This changes the page attributes of the
218  * in kernel linear mapping too.
219  *
220  * Caller must call global_flush_tlb() later to make the changes active.
221  *
222  * The caller needs to ensure that there are no conflicting mappings elsewhere
223  * (e.g. in user space) * This function only deals with the kernel linear map.
224  *
225  * For MMIO areas without mem_map use change_page_attr_addr() instead.
226  */
227 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
228 {
229         unsigned long addr = (unsigned long)page_address(page);
230
231         return change_page_attr_addr(addr, numpages, prot);
232 }
233 EXPORT_SYMBOL(change_page_attr);
234
235 static void flush_kernel_map(void *arg)
236 {
237         /*
238          * Flush all to work around Errata in early athlons regarding
239          * large page flushing.
240          */
241         __flush_tlb_all();
242
243         if (boot_cpu_data.x86_model >= 4)
244                 wbinvd();
245 }
246
247 void global_flush_tlb(void)
248 {
249         BUG_ON(irqs_disabled());
250
251         on_each_cpu(flush_kernel_map, NULL, 1, 1);
252 }
253 EXPORT_SYMBOL(global_flush_tlb);
254
255 #ifdef CONFIG_DEBUG_PAGEALLOC
256 void kernel_map_pages(struct page *page, int numpages, int enable)
257 {
258         if (PageHighMem(page))
259                 return;
260         if (!enable) {
261                 debug_check_no_locks_freed(page_address(page),
262                                            numpages * PAGE_SIZE);
263         }
264
265         /*
266          * If page allocator is not up yet then do not call c_p_a():
267          */
268         if (!debug_pagealloc_enabled)
269                 return;
270
271         /*
272          * the return value is ignored - the calls cannot fail,
273          * large pages are disabled at boot time.
274          */
275         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
276
277         /*
278          * we should perform an IPI and flush all tlbs,
279          * but that can deadlock->flush only current cpu.
280          */
281         __flush_tlb_all();
282 }
283 #endif