a270f9ccebfb004bd98238df6405943b10865ed7
[sfrench/cifs-2.6.git] / arch / x86 / mm / pageattr.c
1 /*
2  * Copyright 2002 Andi Kleen, SuSE Labs.
3  * Thanks to Ben LaHaise for precious feedback.
4  */
5 #include <linux/highmem.h>
6 #include <linux/module.h>
7 #include <linux/sched.h>
8 #include <linux/slab.h>
9 #include <linux/mm.h>
10
11 void clflush_cache_range(void *addr, int size)
12 {
13         int i;
14
15         for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
16                 clflush(addr+i);
17 }
18
19 #include <asm/processor.h>
20 #include <asm/tlbflush.h>
21 #include <asm/sections.h>
22 #include <asm/uaccess.h>
23 #include <asm/pgalloc.h>
24
25 pte_t *lookup_address(unsigned long address, int *level)
26 {
27         pgd_t *pgd = pgd_offset_k(address);
28         pud_t *pud;
29         pmd_t *pmd;
30
31         if (pgd_none(*pgd))
32                 return NULL;
33         pud = pud_offset(pgd, address);
34         if (pud_none(*pud))
35                 return NULL;
36         pmd = pmd_offset(pud, address);
37         if (pmd_none(*pmd))
38                 return NULL;
39         *level = 3;
40         if (pmd_large(*pmd))
41                 return (pte_t *)pmd;
42         *level = 4;
43
44         return pte_offset_kernel(pmd, address);
45 }
46
47 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
48 {
49         /* change init_mm */
50         set_pte_atomic(kpte, pte);
51 #ifdef CONFIG_X86_32
52         if (!SHARED_KERNEL_PMD) {
53                 struct page *page;
54
55                 for (page = pgd_list; page; page = (struct page *)page->index) {
56                         pgd_t *pgd;
57                         pud_t *pud;
58                         pmd_t *pmd;
59
60                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
61                         pud = pud_offset(pgd, address);
62                         pmd = pmd_offset(pud, address);
63                         set_pte_atomic((pte_t *)pmd, pte);
64                 }
65         }
66 #endif
67 }
68
69 static int split_large_page(pte_t *kpte, unsigned long address)
70 {
71         pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
72         gfp_t gfp_flags = GFP_KERNEL;
73         unsigned long flags;
74         unsigned long addr;
75         pte_t *pbase, *tmp;
76         struct page *base;
77         int i, level;
78
79 #ifdef CONFIG_DEBUG_PAGEALLOC
80         gfp_flags = GFP_ATOMIC;
81 #endif
82         base = alloc_pages(gfp_flags, 0);
83         if (!base)
84                 return -ENOMEM;
85
86         spin_lock_irqsave(&pgd_lock, flags);
87         /*
88          * Check for races, another CPU might have split this page
89          * up for us already:
90          */
91         tmp = lookup_address(address, &level);
92         if (tmp != kpte) {
93                 WARN_ON_ONCE(1);
94                 goto out_unlock;
95         }
96
97         address = __pa(address);
98         addr = address & LARGE_PAGE_MASK;
99         pbase = (pte_t *)page_address(base);
100 #ifdef CONFIG_X86_32
101         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
102 #endif
103
104         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
105                 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
106
107         /*
108          * Install the new, split up pagetable:
109          */
110         __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
111         base = NULL;
112
113 out_unlock:
114         spin_unlock_irqrestore(&pgd_lock, flags);
115
116         if (base)
117                 __free_pages(base, 0);
118
119         return 0;
120 }
121
122 static int
123 __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
124 {
125         struct page *kpte_page;
126         int level, err = 0;
127         pte_t *kpte;
128
129         BUG_ON(PageHighMem(page));
130
131 repeat:
132         kpte = lookup_address(address, &level);
133         if (!kpte)
134                 return -EINVAL;
135
136         kpte_page = virt_to_page(kpte);
137         BUG_ON(PageLRU(kpte_page));
138         BUG_ON(PageCompound(kpte_page));
139
140         /*
141          * Better fail early if someone sets the kernel text to NX.
142          * Does not cover __inittext
143          */
144         BUG_ON(address >= (unsigned long)&_text &&
145                 address < (unsigned long)&_etext &&
146                (pgprot_val(prot) & _PAGE_NX));
147
148         if (level == 4) {
149                 set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
150         } else {
151                 err = split_large_page(kpte, address);
152                 if (!err)
153                         goto repeat;
154         }
155         return err;
156 }
157
158 /**
159  * change_page_attr_addr - Change page table attributes in linear mapping
160  * @address: Virtual address in linear mapping.
161  * @numpages: Number of pages to change
162  * @prot:    New page table attribute (PAGE_*)
163  *
164  * Change page attributes of a page in the direct mapping. This is a variant
165  * of change_page_attr() that also works on memory holes that do not have
166  * mem_map entry (pfn_valid() is false).
167  *
168  * See change_page_attr() documentation for more details.
169  */
170
171 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
172 {
173         int err = 0, kernel_map = 0, i;
174
175 #ifdef CONFIG_X86_64
176         if (address >= __START_KERNEL_map &&
177                         address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
178
179                 address = (unsigned long)__va(__pa(address));
180                 kernel_map = 1;
181         }
182 #endif
183
184         for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
185                 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
186
187                 if (!kernel_map || pte_present(pfn_pte(0, prot))) {
188                         err = __change_page_attr(address, pfn_to_page(pfn), prot);
189                         if (err)
190                                 break;
191                 }
192 #ifdef CONFIG_X86_64
193                 /*
194                  * Handle kernel mapping too which aliases part of
195                  * lowmem:
196                  */
197                 if (__pa(address) < KERNEL_TEXT_SIZE) {
198                         unsigned long addr2;
199                         pgprot_t prot2;
200
201                         addr2 = __START_KERNEL_map + __pa(address);
202                         /* Make sure the kernel mappings stay executable */
203                         prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
204                         err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
205                 }
206 #endif
207         }
208
209         return err;
210 }
211
212 /**
213  * change_page_attr - Change page table attributes in the linear mapping.
214  * @page: First page to change
215  * @numpages: Number of pages to change
216  * @prot: New protection/caching type (PAGE_*)
217  *
218  * Returns 0 on success, otherwise a negated errno.
219  *
220  * This should be used when a page is mapped with a different caching policy
221  * than write-back somewhere - some CPUs do not like it when mappings with
222  * different caching policies exist. This changes the page attributes of the
223  * in kernel linear mapping too.
224  *
225  * Caller must call global_flush_tlb() later to make the changes active.
226  *
227  * The caller needs to ensure that there are no conflicting mappings elsewhere
228  * (e.g. in user space) * This function only deals with the kernel linear map.
229  *
230  * For MMIO areas without mem_map use change_page_attr_addr() instead.
231  */
232 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
233 {
234         unsigned long addr = (unsigned long)page_address(page);
235
236         return change_page_attr_addr(addr, numpages, prot);
237 }
238 EXPORT_SYMBOL(change_page_attr);
239
240 static void flush_kernel_map(void *arg)
241 {
242         /*
243          * Flush all to work around Errata in early athlons regarding
244          * large page flushing.
245          */
246         __flush_tlb_all();
247
248         if (boot_cpu_data.x86_model >= 4)
249                 wbinvd();
250 }
251
252 void global_flush_tlb(void)
253 {
254         BUG_ON(irqs_disabled());
255
256         on_each_cpu(flush_kernel_map, NULL, 1, 1);
257 }
258 EXPORT_SYMBOL(global_flush_tlb);
259
260 #ifdef CONFIG_DEBUG_PAGEALLOC
261 void kernel_map_pages(struct page *page, int numpages, int enable)
262 {
263         if (PageHighMem(page))
264                 return;
265         if (!enable) {
266                 debug_check_no_locks_freed(page_address(page),
267                                            numpages * PAGE_SIZE);
268         }
269
270         /*
271          * If page allocator is not up yet then do not call c_p_a():
272          */
273         if (!debug_pagealloc_enabled)
274                 return;
275
276         /*
277          * The return value is ignored - the calls cannot fail,
278          * large pages are disabled at boot time:
279          */
280         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
281
282         /*
283          * We should perform an IPI and flush all tlbs,
284          * but that can deadlock->flush only current cpu:
285          */
286         __flush_tlb_all();
287 }
288 #endif