Merge tag 's390-6.6-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[sfrench/cifs-2.6.git] / arch / s390 / mm / pageattr.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright IBM Corp. 2011
4  * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
5  */
6 #include <linux/hugetlb.h>
7 #include <linux/proc_fs.h>
8 #include <linux/vmalloc.h>
9 #include <linux/mm.h>
10 #include <asm/cacheflush.h>
11 #include <asm/facility.h>
12 #include <asm/pgalloc.h>
13 #include <asm/kfence.h>
14 #include <asm/page.h>
15 #include <asm/set_memory.h>
16
17 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
18 {
19         asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
20                      : [addr] "+a" (addr) : [skey] "d" (skey));
21         return addr;
22 }
23
24 void __storage_key_init_range(unsigned long start, unsigned long end)
25 {
26         unsigned long boundary, size;
27
28         while (start < end) {
29                 if (MACHINE_HAS_EDAT1) {
30                         /* set storage keys for a 1MB frame */
31                         size = 1UL << 20;
32                         boundary = (start + size) & ~(size - 1);
33                         if (boundary <= end) {
34                                 do {
35                                         start = sske_frame(start, PAGE_DEFAULT_KEY);
36                                 } while (start < boundary);
37                                 continue;
38                         }
39                 }
40                 page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
41                 start += PAGE_SIZE;
42         }
43 }
44
45 #ifdef CONFIG_PROC_FS
46 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
47
48 void arch_report_meminfo(struct seq_file *m)
49 {
50         seq_printf(m, "DirectMap4k:    %8lu kB\n",
51                    atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
52         seq_printf(m, "DirectMap1M:    %8lu kB\n",
53                    atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
54         seq_printf(m, "DirectMap2G:    %8lu kB\n",
55                    atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
56 }
57 #endif /* CONFIG_PROC_FS */
58
59 static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
60                     unsigned long dtt)
61 {
62         unsigned long *table, mask;
63
64         mask = 0;
65         if (MACHINE_HAS_EDAT2) {
66                 switch (dtt) {
67                 case CRDTE_DTT_REGION3:
68                         mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
69                         break;
70                 case CRDTE_DTT_SEGMENT:
71                         mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
72                         break;
73                 case CRDTE_DTT_PAGE:
74                         mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
75                         break;
76                 }
77                 table = (unsigned long *)((unsigned long)old & mask);
78                 crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
79         } else if (MACHINE_HAS_IDTE) {
80                 cspg(old, *old, new);
81         } else {
82                 csp((unsigned int *)old + 1, *old, new);
83         }
84 }
85
86 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
87                           unsigned long flags)
88 {
89         pte_t *ptep, new;
90
91         if (flags == SET_MEMORY_4K)
92                 return 0;
93         ptep = pte_offset_kernel(pmdp, addr);
94         do {
95                 new = *ptep;
96                 if (pte_none(new))
97                         return -EINVAL;
98                 if (flags & SET_MEMORY_RO)
99                         new = pte_wrprotect(new);
100                 else if (flags & SET_MEMORY_RW)
101                         new = pte_mkwrite_novma(pte_mkdirty(new));
102                 if (flags & SET_MEMORY_NX)
103                         new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
104                 else if (flags & SET_MEMORY_X)
105                         new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
106                 if (flags & SET_MEMORY_INV) {
107                         new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
108                 } else if (flags & SET_MEMORY_DEF) {
109                         new = __pte(pte_val(new) & PAGE_MASK);
110                         new = set_pte_bit(new, PAGE_KERNEL);
111                         if (!MACHINE_HAS_NX)
112                                 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
113                 }
114                 pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
115                 ptep++;
116                 addr += PAGE_SIZE;
117                 cond_resched();
118         } while (addr < end);
119         return 0;
120 }
121
122 static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
123 {
124         unsigned long pte_addr, prot;
125         pte_t *pt_dir, *ptep;
126         pmd_t new;
127         int i, ro, nx;
128
129         pt_dir = vmem_pte_alloc();
130         if (!pt_dir)
131                 return -ENOMEM;
132         pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
133         ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
134         nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
135         prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
136         if (!nx)
137                 prot &= ~_PAGE_NOEXEC;
138         ptep = pt_dir;
139         for (i = 0; i < PTRS_PER_PTE; i++) {
140                 set_pte(ptep, __pte(pte_addr | prot));
141                 pte_addr += PAGE_SIZE;
142                 ptep++;
143         }
144         new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
145         pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
146         update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
147         update_page_count(PG_DIRECT_MAP_1M, -1);
148         return 0;
149 }
150
151 static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
152                             unsigned long flags)
153 {
154         pmd_t new = *pmdp;
155
156         if (flags & SET_MEMORY_RO)
157                 new = pmd_wrprotect(new);
158         else if (flags & SET_MEMORY_RW)
159                 new = pmd_mkwrite_novma(pmd_mkdirty(new));
160         if (flags & SET_MEMORY_NX)
161                 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
162         else if (flags & SET_MEMORY_X)
163                 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
164         if (flags & SET_MEMORY_INV) {
165                 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
166         } else if (flags & SET_MEMORY_DEF) {
167                 new = __pmd(pmd_val(new) & PMD_MASK);
168                 new = set_pmd_bit(new, SEGMENT_KERNEL);
169                 if (!MACHINE_HAS_NX)
170                         new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
171         }
172         pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
173 }
174
175 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
176                           unsigned long flags)
177 {
178         unsigned long next;
179         int need_split;
180         pmd_t *pmdp;
181         int rc = 0;
182
183         pmdp = pmd_offset(pudp, addr);
184         do {
185                 if (pmd_none(*pmdp))
186                         return -EINVAL;
187                 next = pmd_addr_end(addr, end);
188                 if (pmd_large(*pmdp)) {
189                         need_split  = !!(flags & SET_MEMORY_4K);
190                         need_split |= !!(addr & ~PMD_MASK);
191                         need_split |= !!(addr + PMD_SIZE > next);
192                         if (need_split) {
193                                 rc = split_pmd_page(pmdp, addr);
194                                 if (rc)
195                                         return rc;
196                                 continue;
197                         }
198                         modify_pmd_page(pmdp, addr, flags);
199                 } else {
200                         rc = walk_pte_level(pmdp, addr, next, flags);
201                         if (rc)
202                                 return rc;
203                 }
204                 pmdp++;
205                 addr = next;
206                 cond_resched();
207         } while (addr < end);
208         return rc;
209 }
210
211 static int split_pud_page(pud_t *pudp, unsigned long addr)
212 {
213         unsigned long pmd_addr, prot;
214         pmd_t *pm_dir, *pmdp;
215         pud_t new;
216         int i, ro, nx;
217
218         pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
219         if (!pm_dir)
220                 return -ENOMEM;
221         pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
222         ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
223         nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
224         prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
225         if (!nx)
226                 prot &= ~_SEGMENT_ENTRY_NOEXEC;
227         pmdp = pm_dir;
228         for (i = 0; i < PTRS_PER_PMD; i++) {
229                 set_pmd(pmdp, __pmd(pmd_addr | prot));
230                 pmd_addr += PMD_SIZE;
231                 pmdp++;
232         }
233         new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
234         pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
235         update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
236         update_page_count(PG_DIRECT_MAP_2G, -1);
237         return 0;
238 }
239
240 static void modify_pud_page(pud_t *pudp, unsigned long addr,
241                             unsigned long flags)
242 {
243         pud_t new = *pudp;
244
245         if (flags & SET_MEMORY_RO)
246                 new = pud_wrprotect(new);
247         else if (flags & SET_MEMORY_RW)
248                 new = pud_mkwrite(pud_mkdirty(new));
249         if (flags & SET_MEMORY_NX)
250                 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
251         else if (flags & SET_MEMORY_X)
252                 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
253         if (flags & SET_MEMORY_INV) {
254                 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
255         } else if (flags & SET_MEMORY_DEF) {
256                 new = __pud(pud_val(new) & PUD_MASK);
257                 new = set_pud_bit(new, REGION3_KERNEL);
258                 if (!MACHINE_HAS_NX)
259                         new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
260         }
261         pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
262 }
263
264 static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
265                           unsigned long flags)
266 {
267         unsigned long next;
268         int need_split;
269         pud_t *pudp;
270         int rc = 0;
271
272         pudp = pud_offset(p4d, addr);
273         do {
274                 if (pud_none(*pudp))
275                         return -EINVAL;
276                 next = pud_addr_end(addr, end);
277                 if (pud_large(*pudp)) {
278                         need_split  = !!(flags & SET_MEMORY_4K);
279                         need_split |= !!(addr & ~PUD_MASK);
280                         need_split |= !!(addr + PUD_SIZE > next);
281                         if (need_split) {
282                                 rc = split_pud_page(pudp, addr);
283                                 if (rc)
284                                         break;
285                                 continue;
286                         }
287                         modify_pud_page(pudp, addr, flags);
288                 } else {
289                         rc = walk_pmd_level(pudp, addr, next, flags);
290                 }
291                 pudp++;
292                 addr = next;
293                 cond_resched();
294         } while (addr < end && !rc);
295         return rc;
296 }
297
298 static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
299                           unsigned long flags)
300 {
301         unsigned long next;
302         p4d_t *p4dp;
303         int rc = 0;
304
305         p4dp = p4d_offset(pgd, addr);
306         do {
307                 if (p4d_none(*p4dp))
308                         return -EINVAL;
309                 next = p4d_addr_end(addr, end);
310                 rc = walk_pud_level(p4dp, addr, next, flags);
311                 p4dp++;
312                 addr = next;
313                 cond_resched();
314         } while (addr < end && !rc);
315         return rc;
316 }
317
318 DEFINE_MUTEX(cpa_mutex);
319
320 static int change_page_attr(unsigned long addr, unsigned long end,
321                             unsigned long flags)
322 {
323         unsigned long next;
324         int rc = -EINVAL;
325         pgd_t *pgdp;
326
327         pgdp = pgd_offset_k(addr);
328         do {
329                 if (pgd_none(*pgdp))
330                         break;
331                 next = pgd_addr_end(addr, end);
332                 rc = walk_p4d_level(pgdp, addr, next, flags);
333                 if (rc)
334                         break;
335                 cond_resched();
336         } while (pgdp++, addr = next, addr < end && !rc);
337         return rc;
338 }
339
340 static int change_page_attr_alias(unsigned long addr, unsigned long end,
341                                   unsigned long flags)
342 {
343         unsigned long alias, offset, va_start, va_end;
344         struct vm_struct *area;
345         int rc = 0;
346
347         /*
348          * Changes to read-only permissions on kernel VA mappings are also
349          * applied to the kernel direct mapping. Execute permissions are
350          * intentionally not transferred to keep all allocated pages within
351          * the direct mapping non-executable.
352          */
353         flags &= SET_MEMORY_RO | SET_MEMORY_RW;
354         if (!flags)
355                 return 0;
356         area = NULL;
357         while (addr < end) {
358                 if (!area)
359                         area = find_vm_area((void *)addr);
360                 if (!area || !(area->flags & VM_ALLOC))
361                         return 0;
362                 va_start = (unsigned long)area->addr;
363                 va_end = va_start + area->nr_pages * PAGE_SIZE;
364                 offset = (addr - va_start) >> PAGE_SHIFT;
365                 alias = (unsigned long)page_address(area->pages[offset]);
366                 rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
367                 if (rc)
368                         break;
369                 addr += PAGE_SIZE;
370                 if (addr >= va_end)
371                         area = NULL;
372         }
373         return rc;
374 }
375
376 int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
377 {
378         unsigned long end;
379         int rc;
380
381         if (!MACHINE_HAS_NX)
382                 flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
383         if (!flags)
384                 return 0;
385         if (!numpages)
386                 return 0;
387         addr &= PAGE_MASK;
388         end = addr + numpages * PAGE_SIZE;
389         mutex_lock(&cpa_mutex);
390         rc = change_page_attr(addr, end, flags);
391         if (rc)
392                 goto out;
393         rc = change_page_attr_alias(addr, end, flags);
394 out:
395         mutex_unlock(&cpa_mutex);
396         return rc;
397 }
398
399 int set_direct_map_invalid_noflush(struct page *page)
400 {
401         return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
402 }
403
404 int set_direct_map_default_noflush(struct page *page)
405 {
406         return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
407 }
408
409 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
410
411 static void ipte_range(pte_t *pte, unsigned long address, int nr)
412 {
413         int i;
414
415         if (test_facility(13)) {
416                 __ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
417                 return;
418         }
419         for (i = 0; i < nr; i++) {
420                 __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
421                 address += PAGE_SIZE;
422                 pte++;
423         }
424 }
425
426 void __kernel_map_pages(struct page *page, int numpages, int enable)
427 {
428         unsigned long address;
429         pte_t *ptep, pte;
430         int nr, i, j;
431
432         for (i = 0; i < numpages;) {
433                 address = (unsigned long)page_to_virt(page + i);
434                 ptep = virt_to_kpte(address);
435                 nr = (unsigned long)ptep >> ilog2(sizeof(long));
436                 nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
437                 nr = min(numpages - i, nr);
438                 if (enable) {
439                         for (j = 0; j < nr; j++) {
440                                 pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
441                                 set_pte(ptep, pte);
442                                 address += PAGE_SIZE;
443                                 ptep++;
444                         }
445                 } else {
446                         ipte_range(ptep, address, nr);
447                 }
448                 i += nr;
449         }
450 }
451
452 #endif /* CONFIG_DEBUG_PAGEALLOC */