Merge tag 's390-6.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[sfrench/cifs-2.6.git] / arch / s390 / kernel / setup.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  S390 version
4  *    Copyright IBM Corp. 1999, 2012
5  *    Author(s): Hartmut Penner (hp@de.ibm.com),
6  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7  *
8  *  Derived from "arch/i386/kernel/setup.c"
9  *    Copyright (C) 1995, Linus Torvalds
10  */
11
12 /*
13  * This file handles the architecture-dependent parts of initialization
14  */
15
16 #define KMSG_COMPONENT "setup"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/errno.h>
20 #include <linux/export.h>
21 #include <linux/sched.h>
22 #include <linux/sched/task.h>
23 #include <linux/cpu.h>
24 #include <linux/kernel.h>
25 #include <linux/memblock.h>
26 #include <linux/mm.h>
27 #include <linux/stddef.h>
28 #include <linux/unistd.h>
29 #include <linux/ptrace.h>
30 #include <linux/random.h>
31 #include <linux/user.h>
32 #include <linux/tty.h>
33 #include <linux/ioport.h>
34 #include <linux/delay.h>
35 #include <linux/init.h>
36 #include <linux/initrd.h>
37 #include <linux/root_dev.h>
38 #include <linux/console.h>
39 #include <linux/kernel_stat.h>
40 #include <linux/dma-map-ops.h>
41 #include <linux/device.h>
42 #include <linux/notifier.h>
43 #include <linux/pfn.h>
44 #include <linux/ctype.h>
45 #include <linux/reboot.h>
46 #include <linux/topology.h>
47 #include <linux/kexec.h>
48 #include <linux/crash_dump.h>
49 #include <linux/memory.h>
50 #include <linux/compat.h>
51 #include <linux/start_kernel.h>
52 #include <linux/hugetlb.h>
53 #include <linux/kmemleak.h>
54
55 #include <asm/archrandom.h>
56 #include <asm/boot_data.h>
57 #include <asm/ipl.h>
58 #include <asm/facility.h>
59 #include <asm/smp.h>
60 #include <asm/mmu_context.h>
61 #include <asm/cpcmd.h>
62 #include <asm/abs_lowcore.h>
63 #include <asm/nmi.h>
64 #include <asm/irq.h>
65 #include <asm/page.h>
66 #include <asm/ptrace.h>
67 #include <asm/sections.h>
68 #include <asm/ebcdic.h>
69 #include <asm/diag.h>
70 #include <asm/os_info.h>
71 #include <asm/sclp.h>
72 #include <asm/stacktrace.h>
73 #include <asm/sysinfo.h>
74 #include <asm/numa.h>
75 #include <asm/alternative.h>
76 #include <asm/nospec-branch.h>
77 #include <asm/physmem_info.h>
78 #include <asm/maccess.h>
79 #include <asm/uv.h>
80 #include <asm/asm-offsets.h>
81 #include "entry.h"
82
83 /*
84  * Machine setup..
85  */
86 unsigned int console_mode = 0;
87 EXPORT_SYMBOL(console_mode);
88
89 unsigned int console_devno = -1;
90 EXPORT_SYMBOL(console_devno);
91
92 unsigned int console_irq = -1;
93 EXPORT_SYMBOL(console_irq);
94
95 /*
96  * Some code and data needs to stay below 2 GB, even when the kernel would be
97  * relocated above 2 GB, because it has to use 31 bit addresses.
98  * Such code and data is part of the .amode31 section.
99  */
100 unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
101 unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
102 unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
103 unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
104 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
105 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
106
107 /*
108  * Control registers CR2, CR5 and CR15 are initialized with addresses
109  * of tables that must be placed below 2G which is handled by the AMODE31
110  * sections.
111  * Because the AMODE31 sections are relocated below 2G at startup,
112  * the content of control registers CR2, CR5 and CR15 must be updated
113  * with new addresses after the relocation. The initial initialization of
114  * control registers occurs in head64.S and then gets updated again after AMODE31
115  * relocation. We must access the relevant AMODE31 tables indirectly via
116  * pointers placed in the .amode31.refs linker section. Those pointers get
117  * updated automatically during AMODE31 relocation and always contain a valid
118  * address within AMODE31 sections.
119  */
120
121 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
122
123 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
124         [1] = 0xffffffffffffffff
125 };
126
127 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
128         0x80000000, 0, 0, 0,
129         0x80000000, 0, 0, 0,
130         0x80000000, 0, 0, 0,
131         0x80000000, 0, 0, 0,
132         0x80000000, 0, 0, 0,
133         0x80000000, 0, 0, 0,
134         0x80000000, 0, 0, 0,
135         0x80000000, 0, 0, 0
136 };
137
138 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
139         0, 0, 0x89000000, 0,
140         0, 0, 0x8a000000, 0
141 };
142
143 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
144 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
145 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
146 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
147
148 int __bootdata(noexec_disabled);
149 unsigned long __bootdata(ident_map_size);
150 struct physmem_info __bootdata(physmem_info);
151
152 unsigned long __bootdata_preserved(__kaslr_offset);
153 int __bootdata_preserved(__kaslr_enabled);
154 unsigned int __bootdata_preserved(zlib_dfltcc_support);
155 EXPORT_SYMBOL(zlib_dfltcc_support);
156 u64 __bootdata_preserved(stfle_fac_list[16]);
157 EXPORT_SYMBOL(stfle_fac_list);
158 u64 __bootdata_preserved(alt_stfle_fac_list[16]);
159 struct oldmem_data __bootdata_preserved(oldmem_data);
160
161 unsigned long VMALLOC_START;
162 EXPORT_SYMBOL(VMALLOC_START);
163
164 unsigned long VMALLOC_END;
165 EXPORT_SYMBOL(VMALLOC_END);
166
167 struct page *vmemmap;
168 EXPORT_SYMBOL(vmemmap);
169 unsigned long vmemmap_size;
170
171 unsigned long MODULES_VADDR;
172 unsigned long MODULES_END;
173
174 /* An array with a pointer to the lowcore of every CPU. */
175 struct lowcore *lowcore_ptr[NR_CPUS];
176 EXPORT_SYMBOL(lowcore_ptr);
177
178 DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
179
180 /*
181  * The Write Back bit position in the physaddr is given by the SLPC PCI.
182  * Leaving the mask zero always uses write through which is safe
183  */
184 unsigned long mio_wb_bit_mask __ro_after_init;
185
186 /*
187  * This is set up by the setup-routine at boot-time
188  * for S390 need to find out, what we have to setup
189  * using address 0x10400 ...
190  */
191
192 #include <asm/setup.h>
193
194 /*
195  * condev= and conmode= setup parameter.
196  */
197
198 static int __init condev_setup(char *str)
199 {
200         int vdev;
201
202         vdev = simple_strtoul(str, &str, 0);
203         if (vdev >= 0 && vdev < 65536) {
204                 console_devno = vdev;
205                 console_irq = -1;
206         }
207         return 1;
208 }
209
210 __setup("condev=", condev_setup);
211
212 static void __init set_preferred_console(void)
213 {
214         if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
215                 add_preferred_console("ttyS", 0, NULL);
216         else if (CONSOLE_IS_3270)
217                 add_preferred_console("tty3270", 0, NULL);
218         else if (CONSOLE_IS_VT220)
219                 add_preferred_console("ttysclp", 0, NULL);
220         else if (CONSOLE_IS_HVC)
221                 add_preferred_console("hvc", 0, NULL);
222 }
223
224 static int __init conmode_setup(char *str)
225 {
226 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
227         if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
228                 SET_CONSOLE_SCLP;
229 #endif
230 #if defined(CONFIG_TN3215_CONSOLE)
231         if (!strcmp(str, "3215"))
232                 SET_CONSOLE_3215;
233 #endif
234 #if defined(CONFIG_TN3270_CONSOLE)
235         if (!strcmp(str, "3270"))
236                 SET_CONSOLE_3270;
237 #endif
238         set_preferred_console();
239         return 1;
240 }
241
242 __setup("conmode=", conmode_setup);
243
244 static void __init conmode_default(void)
245 {
246         char query_buffer[1024];
247         char *ptr;
248
249         if (MACHINE_IS_VM) {
250                 cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
251                 console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
252                 ptr = strstr(query_buffer, "SUBCHANNEL =");
253                 console_irq = simple_strtoul(ptr + 13, NULL, 16);
254                 cpcmd("QUERY TERM", query_buffer, 1024, NULL);
255                 ptr = strstr(query_buffer, "CONMODE");
256                 /*
257                  * Set the conmode to 3215 so that the device recognition 
258                  * will set the cu_type of the console to 3215. If the
259                  * conmode is 3270 and we don't set it back then both
260                  * 3215 and the 3270 driver will try to access the console
261                  * device (3215 as console and 3270 as normal tty).
262                  */
263                 cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
264                 if (ptr == NULL) {
265 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
266                         SET_CONSOLE_SCLP;
267 #endif
268                         return;
269                 }
270                 if (str_has_prefix(ptr + 8, "3270")) {
271 #if defined(CONFIG_TN3270_CONSOLE)
272                         SET_CONSOLE_3270;
273 #elif defined(CONFIG_TN3215_CONSOLE)
274                         SET_CONSOLE_3215;
275 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
276                         SET_CONSOLE_SCLP;
277 #endif
278                 } else if (str_has_prefix(ptr + 8, "3215")) {
279 #if defined(CONFIG_TN3215_CONSOLE)
280                         SET_CONSOLE_3215;
281 #elif defined(CONFIG_TN3270_CONSOLE)
282                         SET_CONSOLE_3270;
283 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
284                         SET_CONSOLE_SCLP;
285 #endif
286                 }
287         } else if (MACHINE_IS_KVM) {
288                 if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
289                         SET_CONSOLE_VT220;
290                 else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
291                         SET_CONSOLE_SCLP;
292                 else
293                         SET_CONSOLE_HVC;
294         } else {
295 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
296                 SET_CONSOLE_SCLP;
297 #endif
298         }
299 }
300
301 #ifdef CONFIG_CRASH_DUMP
302 static void __init setup_zfcpdump(void)
303 {
304         if (!is_ipl_type_dump())
305                 return;
306         if (oldmem_data.start)
307                 return;
308         strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
309         console_loglevel = 2;
310 }
311 #else
312 static inline void setup_zfcpdump(void) {}
313 #endif /* CONFIG_CRASH_DUMP */
314
315  /*
316  * Reboot, halt and power_off stubs. They just call _machine_restart,
317  * _machine_halt or _machine_power_off. 
318  */
319
320 void machine_restart(char *command)
321 {
322         if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
323                 /*
324                  * Only unblank the console if we are called in enabled
325                  * context or a bust_spinlocks cleared the way for us.
326                  */
327                 console_unblank();
328         _machine_restart(command);
329 }
330
331 void machine_halt(void)
332 {
333         if (!in_interrupt() || oops_in_progress)
334                 /*
335                  * Only unblank the console if we are called in enabled
336                  * context or a bust_spinlocks cleared the way for us.
337                  */
338                 console_unblank();
339         _machine_halt();
340 }
341
342 void machine_power_off(void)
343 {
344         if (!in_interrupt() || oops_in_progress)
345                 /*
346                  * Only unblank the console if we are called in enabled
347                  * context or a bust_spinlocks cleared the way for us.
348                  */
349                 console_unblank();
350         _machine_power_off();
351 }
352
353 /*
354  * Dummy power off function.
355  */
356 void (*pm_power_off)(void) = machine_power_off;
357 EXPORT_SYMBOL_GPL(pm_power_off);
358
359 void *restart_stack;
360
361 unsigned long stack_alloc(void)
362 {
363 #ifdef CONFIG_VMAP_STACK
364         void *ret;
365
366         ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
367                              NUMA_NO_NODE, __builtin_return_address(0));
368         kmemleak_not_leak(ret);
369         return (unsigned long)ret;
370 #else
371         return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
372 #endif
373 }
374
375 void stack_free(unsigned long stack)
376 {
377 #ifdef CONFIG_VMAP_STACK
378         vfree((void *) stack);
379 #else
380         free_pages(stack, THREAD_SIZE_ORDER);
381 #endif
382 }
383
384 void __init __noreturn arch_call_rest_init(void)
385 {
386         smp_reinit_ipl_cpu();
387         rest_init();
388 }
389
390 static unsigned long __init stack_alloc_early(void)
391 {
392         unsigned long stack;
393
394         stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
395         if (!stack) {
396                 panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
397                       __func__, THREAD_SIZE, THREAD_SIZE);
398         }
399         return stack;
400 }
401
402 static void __init setup_lowcore(void)
403 {
404         struct lowcore *lc, *abs_lc;
405
406         /*
407          * Setup lowcore for boot cpu
408          */
409         BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
410         lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
411         if (!lc)
412                 panic("%s: Failed to allocate %zu bytes align=%zx\n",
413                       __func__, sizeof(*lc), sizeof(*lc));
414
415         lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
416         lc->restart_psw.addr = __pa(restart_int_handler);
417         lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
418         lc->external_new_psw.addr = (unsigned long) ext_int_handler;
419         lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
420         lc->svc_new_psw.addr = (unsigned long) system_call;
421         lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
422         lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
423         lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
424         lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
425         lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
426         lc->io_new_psw.addr = (unsigned long) io_int_handler;
427         lc->clock_comparator = clock_comparator_max;
428         lc->current_task = (unsigned long)&init_task;
429         lc->lpp = LPP_MAGIC;
430         lc->machine_flags = S390_lowcore.machine_flags;
431         lc->preempt_count = S390_lowcore.preempt_count;
432         nmi_alloc_mcesa_early(&lc->mcesad);
433         lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
434         lc->exit_timer = S390_lowcore.exit_timer;
435         lc->user_timer = S390_lowcore.user_timer;
436         lc->system_timer = S390_lowcore.system_timer;
437         lc->steal_timer = S390_lowcore.steal_timer;
438         lc->last_update_timer = S390_lowcore.last_update_timer;
439         lc->last_update_clock = S390_lowcore.last_update_clock;
440         /*
441          * Allocate the global restart stack which is the same for
442          * all CPUs in case *one* of them does a PSW restart.
443          */
444         restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
445         lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
446         lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
447         lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
448         lc->kernel_stack = S390_lowcore.kernel_stack;
449         /*
450          * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
451          * restart data to the absolute zero lowcore. This is necessary if
452          * PSW restart is done on an offline CPU that has lowcore zero.
453          */
454         lc->restart_stack = (unsigned long) restart_stack;
455         lc->restart_fn = (unsigned long) do_restart;
456         lc->restart_data = 0;
457         lc->restart_source = -1U;
458         __ctl_store(lc->cregs_save_area, 0, 15);
459         lc->spinlock_lockval = arch_spin_lockval(0);
460         lc->spinlock_index = 0;
461         arch_spin_lock_setup(0);
462         lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
463         lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
464         lc->preempt_count = PREEMPT_DISABLED;
465         lc->kernel_asce = S390_lowcore.kernel_asce;
466         lc->user_asce = S390_lowcore.user_asce;
467
468         abs_lc = get_abs_lowcore();
469         abs_lc->restart_stack = lc->restart_stack;
470         abs_lc->restart_fn = lc->restart_fn;
471         abs_lc->restart_data = lc->restart_data;
472         abs_lc->restart_source = lc->restart_source;
473         abs_lc->restart_psw = lc->restart_psw;
474         abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
475         memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
476         abs_lc->program_new_psw = lc->program_new_psw;
477         abs_lc->mcesad = lc->mcesad;
478         put_abs_lowcore(abs_lc);
479
480         set_prefix(__pa(lc));
481         lowcore_ptr[0] = lc;
482         if (abs_lowcore_map(0, lowcore_ptr[0], false))
483                 panic("Couldn't setup absolute lowcore");
484 }
485
486 static struct resource code_resource = {
487         .name  = "Kernel code",
488         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
489 };
490
491 static struct resource data_resource = {
492         .name = "Kernel data",
493         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
494 };
495
496 static struct resource bss_resource = {
497         .name = "Kernel bss",
498         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
499 };
500
501 static struct resource __initdata *standard_resources[] = {
502         &code_resource,
503         &data_resource,
504         &bss_resource,
505 };
506
507 static void __init setup_resources(void)
508 {
509         struct resource *res, *std_res, *sub_res;
510         phys_addr_t start, end;
511         int j;
512         u64 i;
513
514         code_resource.start = (unsigned long) _text;
515         code_resource.end = (unsigned long) _etext - 1;
516         data_resource.start = (unsigned long) _etext;
517         data_resource.end = (unsigned long) _edata - 1;
518         bss_resource.start = (unsigned long) __bss_start;
519         bss_resource.end = (unsigned long) __bss_stop - 1;
520
521         for_each_mem_range(i, &start, &end) {
522                 res = memblock_alloc(sizeof(*res), 8);
523                 if (!res)
524                         panic("%s: Failed to allocate %zu bytes align=0x%x\n",
525                               __func__, sizeof(*res), 8);
526                 res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
527
528                 res->name = "System RAM";
529                 res->start = start;
530                 /*
531                  * In memblock, end points to the first byte after the
532                  * range while in resourses, end points to the last byte in
533                  * the range.
534                  */
535                 res->end = end - 1;
536                 request_resource(&iomem_resource, res);
537
538                 for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
539                         std_res = standard_resources[j];
540                         if (std_res->start < res->start ||
541                             std_res->start > res->end)
542                                 continue;
543                         if (std_res->end > res->end) {
544                                 sub_res = memblock_alloc(sizeof(*sub_res), 8);
545                                 if (!sub_res)
546                                         panic("%s: Failed to allocate %zu bytes align=0x%x\n",
547                                               __func__, sizeof(*sub_res), 8);
548                                 *sub_res = *std_res;
549                                 sub_res->end = res->end;
550                                 std_res->start = res->end + 1;
551                                 request_resource(res, sub_res);
552                         } else {
553                                 request_resource(res, std_res);
554                         }
555                 }
556         }
557 #ifdef CONFIG_CRASH_DUMP
558         /*
559          * Re-add removed crash kernel memory as reserved memory. This makes
560          * sure it will be mapped with the identity mapping and struct pages
561          * will be created, so it can be resized later on.
562          * However add it later since the crash kernel resource should not be
563          * part of the System RAM resource.
564          */
565         if (crashk_res.end) {
566                 memblock_add_node(crashk_res.start, resource_size(&crashk_res),
567                                   0, MEMBLOCK_NONE);
568                 memblock_reserve(crashk_res.start, resource_size(&crashk_res));
569                 insert_resource(&iomem_resource, &crashk_res);
570         }
571 #endif
572 }
573
574 static void __init setup_memory_end(void)
575 {
576         max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
577         pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
578 }
579
580 #ifdef CONFIG_CRASH_DUMP
581
582 /*
583  * When kdump is enabled, we have to ensure that no memory from the area
584  * [0 - crashkernel memory size] is set offline - it will be exchanged with
585  * the crashkernel memory region when kdump is triggered. The crashkernel
586  * memory region can never get offlined (pages are unmovable).
587  */
588 static int kdump_mem_notifier(struct notifier_block *nb,
589                               unsigned long action, void *data)
590 {
591         struct memory_notify *arg = data;
592
593         if (action != MEM_GOING_OFFLINE)
594                 return NOTIFY_OK;
595         if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
596                 return NOTIFY_BAD;
597         return NOTIFY_OK;
598 }
599
600 static struct notifier_block kdump_mem_nb = {
601         .notifier_call = kdump_mem_notifier,
602 };
603
604 #endif
605
606 /*
607  * Reserve page tables created by decompressor
608  */
609 static void __init reserve_pgtables(void)
610 {
611         unsigned long start, end;
612         struct reserved_range *range;
613
614         for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
615                 memblock_reserve(start, end - start);
616 }
617
618 /*
619  * Reserve memory for kdump kernel to be loaded with kexec
620  */
621 static void __init reserve_crashkernel(void)
622 {
623 #ifdef CONFIG_CRASH_DUMP
624         unsigned long long crash_base, crash_size;
625         phys_addr_t low, high;
626         int rc;
627
628         rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
629                                &crash_base);
630
631         crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
632         crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
633         if (rc || crash_size == 0)
634                 return;
635
636         if (memblock.memory.regions[0].size < crash_size) {
637                 pr_info("crashkernel reservation failed: %s\n",
638                         "first memory chunk must be at least crashkernel size");
639                 return;
640         }
641
642         low = crash_base ?: oldmem_data.start;
643         high = low + crash_size;
644         if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
645                 /* The crashkernel fits into OLDMEM, reuse OLDMEM */
646                 crash_base = low;
647         } else {
648                 /* Find suitable area in free memory */
649                 low = max_t(unsigned long, crash_size, sclp.hsa_size);
650                 high = crash_base ? crash_base + crash_size : ULONG_MAX;
651
652                 if (crash_base && crash_base < low) {
653                         pr_info("crashkernel reservation failed: %s\n",
654                                 "crash_base too low");
655                         return;
656                 }
657                 low = crash_base ?: low;
658                 crash_base = memblock_phys_alloc_range(crash_size,
659                                                        KEXEC_CRASH_MEM_ALIGN,
660                                                        low, high);
661         }
662
663         if (!crash_base) {
664                 pr_info("crashkernel reservation failed: %s\n",
665                         "no suitable area found");
666                 return;
667         }
668
669         if (register_memory_notifier(&kdump_mem_nb)) {
670                 memblock_phys_free(crash_base, crash_size);
671                 return;
672         }
673
674         if (!oldmem_data.start && MACHINE_IS_VM)
675                 diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
676         crashk_res.start = crash_base;
677         crashk_res.end = crash_base + crash_size - 1;
678         memblock_remove(crash_base, crash_size);
679         pr_info("Reserving %lluMB of memory at %lluMB "
680                 "for crashkernel (System RAM: %luMB)\n",
681                 crash_size >> 20, crash_base >> 20,
682                 (unsigned long)memblock.memory.total_size >> 20);
683         os_info_crashkernel_add(crash_base, crash_size);
684 #endif
685 }
686
687 /*
688  * Reserve the initrd from being used by memblock
689  */
690 static void __init reserve_initrd(void)
691 {
692         unsigned long addr, size;
693
694         if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
695                 return;
696         initrd_start = (unsigned long)__va(addr);
697         initrd_end = initrd_start + size;
698         memblock_reserve(addr, size);
699 }
700
701 /*
702  * Reserve the memory area used to pass the certificate lists
703  */
704 static void __init reserve_certificate_list(void)
705 {
706         if (ipl_cert_list_addr)
707                 memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
708 }
709
710 static void __init reserve_physmem_info(void)
711 {
712         unsigned long addr, size;
713
714         if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
715                 memblock_reserve(addr, size);
716 }
717
718 static void __init free_physmem_info(void)
719 {
720         unsigned long addr, size;
721
722         if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
723                 memblock_phys_free(addr, size);
724 }
725
726 static void __init memblock_add_physmem_info(void)
727 {
728         unsigned long start, end;
729         int i;
730
731         pr_debug("physmem info source: %s (%hhd)\n",
732                  get_physmem_info_source(), physmem_info.info_source);
733         /* keep memblock lists close to the kernel */
734         memblock_set_bottom_up(true);
735         for_each_physmem_usable_range(i, &start, &end)
736                 memblock_add(start, end - start);
737         for_each_physmem_online_range(i, &start, &end)
738                 memblock_physmem_add(start, end - start);
739         memblock_set_bottom_up(false);
740         memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
741 }
742
743 /*
744  * Reserve memory used for lowcore/command line/kernel image.
745  */
746 static void __init reserve_kernel(void)
747 {
748         memblock_reserve(0, STARTUP_NORMAL_OFFSET);
749         memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
750         memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
751         memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
752         memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
753         memblock_reserve(__pa(_stext), _end - _stext);
754 }
755
756 static void __init setup_memory(void)
757 {
758         phys_addr_t start, end;
759         u64 i;
760
761         /*
762          * Init storage key for present memory
763          */
764         for_each_mem_range(i, &start, &end)
765                 storage_key_init_range(start, end);
766
767         psw_set_key(PAGE_DEFAULT_KEY);
768 }
769
770 static void __init relocate_amode31_section(void)
771 {
772         unsigned long amode31_size = __eamode31 - __samode31;
773         long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31;
774         long *ptr;
775
776         pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
777
778         /* Move original AMODE31 section to the new one */
779         memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size);
780         /* Zero out the old AMODE31 section to catch invalid accesses within it */
781         memset((void *)__samode31, 0, amode31_size);
782
783         /* Update all AMODE31 region references */
784         for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
785                 *ptr += amode31_offset;
786 }
787
788 /* This must be called after AMODE31 relocation */
789 static void __init setup_cr(void)
790 {
791         union ctlreg2 cr2;
792         union ctlreg5 cr5;
793         union ctlreg15 cr15;
794
795         __ctl_duct[1] = (unsigned long)__ctl_aste;
796         __ctl_duct[2] = (unsigned long)__ctl_aste;
797         __ctl_duct[4] = (unsigned long)__ctl_duald;
798
799         /* Update control registers CR2, CR5 and CR15 */
800         __ctl_store(cr2.val, 2, 2);
801         __ctl_store(cr5.val, 5, 5);
802         __ctl_store(cr15.val, 15, 15);
803         cr2.ducto = (unsigned long)__ctl_duct >> 6;
804         cr5.pasteo = (unsigned long)__ctl_duct >> 6;
805         cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
806         __ctl_load(cr2.val, 2, 2);
807         __ctl_load(cr5.val, 5, 5);
808         __ctl_load(cr15.val, 15, 15);
809 }
810
811 /*
812  * Add system information as device randomness
813  */
814 static void __init setup_randomness(void)
815 {
816         struct sysinfo_3_2_2 *vmms;
817
818         vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
819         if (!vmms)
820                 panic("Failed to allocate memory for sysinfo structure\n");
821         if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
822                 add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
823         memblock_free(vmms, PAGE_SIZE);
824
825         if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
826                 static_branch_enable(&s390_arch_random_available);
827 }
828
829 /*
830  * Find the correct size for the task_struct. This depends on
831  * the size of the struct fpu at the end of the thread_struct
832  * which is embedded in the task_struct.
833  */
834 static void __init setup_task_size(void)
835 {
836         int task_size = sizeof(struct task_struct);
837
838         if (!MACHINE_HAS_VX) {
839                 task_size -= sizeof(__vector128) * __NUM_VXRS;
840                 task_size += sizeof(freg_t) * __NUM_FPRS;
841         }
842         arch_task_struct_size = task_size;
843 }
844
845 /*
846  * Issue diagnose 318 to set the control program name and
847  * version codes.
848  */
849 static void __init setup_control_program_code(void)
850 {
851         union diag318_info diag318_info = {
852                 .cpnc = CPNC_LINUX,
853                 .cpvc = 0,
854         };
855
856         if (!sclp.has_diag318)
857                 return;
858
859         diag_stat_inc(DIAG_STAT_X318);
860         asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
861 }
862
863 /*
864  * Print the component list from the IPL report
865  */
866 static void __init log_component_list(void)
867 {
868         struct ipl_rb_component_entry *ptr, *end;
869         char *str;
870
871         if (!early_ipl_comp_list_addr)
872                 return;
873         if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
874                 pr_info("Linux is running with Secure-IPL enabled\n");
875         else
876                 pr_info("Linux is running with Secure-IPL disabled\n");
877         ptr = (void *) early_ipl_comp_list_addr;
878         end = (void *) ptr + early_ipl_comp_list_size;
879         pr_info("The IPL report contains the following components:\n");
880         while (ptr < end) {
881                 if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
882                         if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
883                                 str = "signed, verified";
884                         else
885                                 str = "signed, verification failed";
886                 } else {
887                         str = "not signed";
888                 }
889                 pr_info("%016llx - %016llx (%s)\n",
890                         ptr->addr, ptr->addr + ptr->len, str);
891                 ptr++;
892         }
893 }
894
895 /*
896  * Setup function called from init/main.c just after the banner
897  * was printed.
898  */
899
900 void __init setup_arch(char **cmdline_p)
901 {
902         /*
903          * print what head.S has found out about the machine
904          */
905         if (MACHINE_IS_VM)
906                 pr_info("Linux is running as a z/VM "
907                         "guest operating system in 64-bit mode\n");
908         else if (MACHINE_IS_KVM)
909                 pr_info("Linux is running under KVM in 64-bit mode\n");
910         else if (MACHINE_IS_LPAR)
911                 pr_info("Linux is running natively in 64-bit mode\n");
912         else
913                 pr_info("Linux is running as a guest in 64-bit mode\n");
914
915         log_component_list();
916
917         /* Have one command line that is parsed and saved in /proc/cmdline */
918         /* boot_command_line has been already set up in early.c */
919         *cmdline_p = boot_command_line;
920
921         ROOT_DEV = Root_RAM0;
922
923         setup_initial_init_mm(_text, _etext, _edata, _end);
924
925         if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
926                 nospec_auto_detect();
927
928         jump_label_init();
929         parse_early_param();
930 #ifdef CONFIG_CRASH_DUMP
931         /* Deactivate elfcorehdr= kernel parameter */
932         elfcorehdr_addr = ELFCORE_ADDR_MAX;
933 #endif
934
935         os_info_init();
936         setup_ipl();
937         setup_task_size();
938         setup_control_program_code();
939
940         /* Do some memory reservations *before* memory is added to memblock */
941         reserve_pgtables();
942         reserve_kernel();
943         reserve_initrd();
944         reserve_certificate_list();
945         reserve_physmem_info();
946         memblock_set_current_limit(ident_map_size);
947         memblock_allow_resize();
948
949         /* Get information about *all* installed memory */
950         memblock_add_physmem_info();
951
952         free_physmem_info();
953         setup_memory_end();
954         memblock_dump_all();
955         setup_memory();
956
957         relocate_amode31_section();
958         setup_cr();
959         setup_uv();
960         dma_contiguous_reserve(ident_map_size);
961         vmcp_cma_reserve();
962         if (MACHINE_HAS_EDAT2)
963                 hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
964
965         reserve_crashkernel();
966 #ifdef CONFIG_CRASH_DUMP
967         /*
968          * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
969          * Therefore CPU and device initialization should be done afterwards.
970          */
971         smp_save_dump_secondary_cpus();
972 #endif
973
974         setup_resources();
975         setup_lowcore();
976         smp_fill_possible_mask();
977         cpu_detect_mhz_feature();
978         cpu_init();
979         numa_setup();
980         smp_detect_cpus();
981         topology_init_early();
982
983         if (test_facility(193))
984                 static_branch_enable(&cpu_has_bear);
985
986         /*
987          * Create kernel page tables.
988          */
989         paging_init();
990
991         /*
992          * After paging_init created the kernel page table, the new PSWs
993          * in lowcore can now run with DAT enabled.
994          */
995 #ifdef CONFIG_CRASH_DUMP
996         smp_save_dump_ipl_cpu();
997 #endif
998
999         /* Setup default console */
1000         conmode_default();
1001         set_preferred_console();
1002
1003         apply_alternative_instructions();
1004         if (IS_ENABLED(CONFIG_EXPOLINE))
1005                 nospec_init_branches();
1006
1007         /* Setup zfcp/nvme dump support */
1008         setup_zfcpdump();
1009
1010         /* Add system specific data to the random pool */
1011         setup_randomness();
1012 }