config HOTPLUG_SMT
bool
+# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL
+config HOTPLUG_CORE_SYNC
+ bool
+
+# Basic CPU dead synchronization selected by architecture
+config HOTPLUG_CORE_SYNC_DEAD
+ bool
+ select HOTPLUG_CORE_SYNC
+
+# Full CPU synchronization with alive state selected by architecture
+config HOTPLUG_CORE_SYNC_FULL
+ bool
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
+ select HOTPLUG_CORE_SYNC
+
+config HOTPLUG_SPLIT_STARTUP
+ bool
+ select HOTPLUG_CORE_SYNC_FULL
+
+config HOTPLUG_PARALLEL
+ bool
+ select HOTPLUG_SPLIT_STARTUP
+
config GENERIC_ENTRY
bool
config ARCH_HAS_DMA_CLEAR_UNCACHED
bool
+config ARCH_HAS_CPU_FINALIZE_INIT
+ bool
+
# Select if arch init_task must go in the __init_task_data section
config ARCH_TASK_STRUCT_ON_STACK
bool
The arch chooses to use the generic perf-NMI-based hardlockup
detector. Must define HAVE_PERF_EVENTS_NMI.
- config HAVE_NMI_WATCHDOG
- depends on HAVE_NMI
- bool
- help
- The arch provides a low level NMI watchdog. It provides
- asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
-
config HAVE_HARDLOCKUP_DETECTOR_ARCH
bool
- select HAVE_NMI_WATCHDOG
help
- The arch chooses to provide its own hardlockup detector, which is
- a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
- interfaces and parameters provided by hardlockup detector subsystem.
+ The arch provides its own hardlockup detector implementation instead
+ of the generic ones.
+
+ It uses the same command line parameters, and sysctl interface,
+ as the generic hardlockup detectors.
config HAVE_PERF_REGS
bool
config ARCH_NO_PREEMPT
bool
-config ARCH_EPHEMERAL_INODES
- def_bool n
- help
- An arch should select this symbol if it doesn't keep track of inode
- instances on its own, but instead relies on something else (e.g. the
- host kernel for an UML kernel).
-
config ARCH_SUPPORTS_RT
bool
select CRC32
select DCACHE_WORD_ACCESS
select DYNAMIC_FTRACE if FUNCTION_TRACER
+ select DMA_BOUNCE_UNALIGNED_KMALLOC
select DMA_DIRECT_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GCC_PLUGINS
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
+ HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IOREMAP_PROT
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KVM
+ select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_PERF_EVENTS
+ select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_GENERIC_VDSO
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
default y
- select ARM64_MODULE_PLTS if MODULES
help
This option links the kernel with '--fix-cortex-a53-843419' and
enables PLT support to replace certain ADRP instructions, which can
# 16K | 27 | 14 | 13 | 11 |
# 64K | 29 | 16 | 13 | 13 |
config ARCH_FORCE_MAX_ORDER
- int "Order of maximal physically contiguous allocations" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES)
+ int
default "13" if ARM64_64K_PAGES
default "11" if ARM64_16K_PAGES
default "10"
When this option is enabled, user applications can opt in to a
relaxed ABI via prctl() allowing tagged addresses to be passed
to system calls as pointer arguments. For details, see
- Documentation/arm64/tagged-address-abi.rst.
+ Documentation/arch/arm64/tagged-address-abi.rst.
menuconfig COMPAT
bool "Kernel support for 32-bit EL0"
the system. This permits binaries to be run on ARMv4 through
to ARMv8 without modification.
- See Documentation/arm/kernel_user_helpers.rst for details.
+ See Documentation/arch/arm/kernel_user_helpers.rst for details.
However, the fixed address nature of these helpers can be used
by ROP (return orientated programming) authors when creating
explicitly opt in. The mechanism for the userspace is
described in:
- Documentation/arm64/memory-tagging-extension.rst.
+ Documentation/arch/arm64/memory-tagging-extension.rst.
endmenu # "ARMv8.5 architectural features"
register state capable of holding two dimensional matrix tiles to
enable various matrix operations.
-config ARM64_MODULE_PLTS
- bool "Use PLTs to allow module memory to spill over into vmalloc area"
- depends on MODULES
- select HAVE_MOD_ARCH_SPECIFIC
- help
- Allocate PLTs when loading modules so that jumps and calls whose
- targets are too far away for their relative offsets to be encoded
- in the instructions themselves can be bounced via veneers in the
- module's PLT. This allows modules to be allocated in the generic
- vmalloc area after the dedicated module memory area has been
- exhausted.
-
- When running with address space randomization (KASLR), the module
- region itself may be too far away for ordinary relative jumps and
- calls, and so in that case, module PLTs are required and cannot be
- disabled.
-
- Specific errata workaround(s) might also force module PLTs to be
- enabled (ARM64_ERRATUM_843419).
-
config ARM64_PSEUDO_NMI
bool "Support for NMI-like interrupts"
select ARM_GIC_V3
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
- select ARM64_MODULE_PLTS if MODULES
select RELOCATABLE
help
Randomizes the virtual address at which the kernel image is
When this option is not set, the module region will be randomized over
a limited range that contains the [_stext, _etext] interval of the
core kernel, so branch relocations are almost always in range unless
- ARM64_MODULE_PLTS is enabled and the region is exhausted. In this
- particular case of region exhaustion, modules might be able to fall
- back to a larger 2GB area.
+ the region is exhausted. In this particular case of region
+ exhaustion, modules might be able to fall back to a larger 2GB area.
config CC_HAVE_STACKPROTECTOR_SYSREG
def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)
obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
-obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
+obj-$(CONFIG_MODULES) += module.o module-plts.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
+ obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
obj-$(CONFIG_CPU_IDLE) += cpuidle.o
static void
smp_cpu_init(int cpunum)
{
- extern void init_IRQ(void); /* arch/parisc/kernel/irq.c */
extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */
/* Set modes and Enable floating point coprocessor */
void __cpu_die(unsigned int cpu)
{
pdc_cpu_rendezvous_lock();
+}
- if (!cpu_wait_death(cpu, 5)) {
- pr_crit("CPU%u: cpu didn't die\n", cpu);
- return;
- }
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
pr_info("CPU%u: is shutting down\n", cpu);
/* set task's state to interruptible sleep */
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_PCI_IOMAP
select HAS_IOPORT
- select HAVE_NMI_WATCHDOG if SPARC64
+ select HAVE_HARDLOCKUP_DETECTOR_SPARC64 if SPARC64
select HAVE_CBPF_JIT if SPARC32
select HAVE_EBPF_JIT if SPARC64
select HAVE_DEBUG_BUGVERBOSE
config SPARC32
def_bool !64BIT
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_CPU_FINALIZE_INIT if !SMP
select ARCH_HAS_SYNC_DMA_FOR_CPU
select CLZ_TAB
select DMA_DIRECT_REMAP
extern void tsc_early_init(void);
extern void tsc_init(void);
- extern unsigned long calibrate_delay_is_known(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
#ifdef CONFIG_X86_TSC
extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
extern void tsc_verify_tsc_adjust(bool resume);
-extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
#else
static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
static inline void tsc_verify_tsc_adjust(bool resume) { }
-static inline void check_tsc_sync_source(int cpu) { }
static inline void check_tsc_sync_target(void) { }
#endif
{
if (event->hw.flags & ARMPMU_EVT_64BIT)
return GENMASK_ULL(63, 0);
+ else if (event->hw.flags & ARMPMU_EVT_63BIT)
+ return GENMASK_ULL(62, 0);
else if (event->hw.flags & ARMPMU_EVT_47BIT)
return GENMASK_ULL(46, 0);
else
return per_cpu(hw_events->irq, cpu);
}
+ bool arm_pmu_irq_is_nmi(void)
+ {
+ return has_nmi;
+ }
+
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
#include <linux/platform_device.h>
#include <linux/sched_clock.h>
#include <linux/smp.h>
+ #include <linux/nmi.h>
#include <asm/arm_pmuv3.h>
return value;
}
+static void update_pmuserenr(u64 val)
+{
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * The current PMUSERENR_EL0 value might be the value for the guest.
+ * If that's the case, have KVM keep tracking of the register value
+ * for the host EL0 so that KVM can restore it before returning to
+ * the host EL0. Otherwise, update the register now.
+ */
+ if (kvm_set_pmuserenr(val))
+ return;
+
+ write_pmuserenr(val);
+}
+
static void armv8pmu_disable_user_access(void)
{
- write_pmuserenr(0);
+ update_pmuserenr(0);
}
static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
armv8pmu_write_evcntr(i, 0);
}
- write_pmuserenr(0);
- write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
+ update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
}
static void armv8pmu_enable_event(struct perf_event *event)
static int __init armv8_pmu_driver_init(void)
{
+ int ret;
+
if (acpi_disabled)
- return platform_driver_register(&armv8_pmu_driver);
+ ret = platform_driver_register(&armv8_pmu_driver);
else
- return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init);
+ ret = arm_pmu_acpi_probe(armv8_pmuv3_pmu_init);
+
+ if (!ret)
+ lockup_detector_retry_init();
+
+ return ret;
}
device_initcall(armv8_pmu_driver_init)
DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter);
- DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
-
DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter);
+DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
+
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_truncate_file_error);
);
DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret);
+DEFINE_OCFS2_INT_EVENT(filemap_splice_read_ret);
/* End of trace events for fs/ocfs2/file.c. */
extern void acpi_early_init(void);
extern void acpi_subsystem_init(void);
- extern void arch_post_acpi_subsys_init(void);
extern int acpi_nvs_register(__u64 start, __u64 size);
#endif /* !CONFIG_ACPI */
+ extern void arch_post_acpi_subsys_init(void);
+
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
int acpi_ioapic_add(acpi_handle root);
#else
}
#endif
+#ifdef CONFIG_ARM64
+void acpi_arm_init(void);
+#else
+static inline void acpi_arm_init(void) { }
+#endif
+
#ifdef CONFIG_ACPI_PCC
void acpi_init_pcc(void);
#else
#else /* CONFIG_PER_VMA_LOCK */
-static inline void vma_init_lock(struct vm_area_struct *vma) {}
static inline bool vma_start_read(struct vm_area_struct *vma)
{ return false; }
static inline void vma_end_read(struct vm_area_struct *vma) {}
return mas_find(&vmi->mas, ULONG_MAX);
}
+static inline
+struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
+{
+ return mas_next_range(&vmi->mas, ULONG_MAX);
+}
+
+
static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
{
return mas_prev(&vmi->mas, 0);
}
+static inline
+struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
+{
+ return mas_prev_range(&vmi->mas, 0);
+}
+
static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
{
return vmi->mas.index;
#endif
NR_COMPOUND_DTORS,
};
-extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS];
-
-static inline void set_compound_page_dtor(struct page *page,
- enum compound_dtor_id compound_dtor)
-{
- struct folio *folio = (struct folio *)page;
-
- VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
- VM_BUG_ON_PAGE(!PageHead(page), page);
- folio->_folio_dtor = compound_dtor;
-}
static inline void folio_set_compound_dtor(struct folio *folio,
enum compound_dtor_id compound_dtor)
void destroy_large_folio(struct folio *folio);
-static inline void set_compound_order(struct page *page, unsigned int order)
-{
- struct folio *folio = (struct folio *)page;
-
- folio->_folio_order = order;
-#ifdef CONFIG_64BIT
- folio->_folio_nr_pages = 1U << order;
-#endif
-}
-
/* Returns the number of bytes in this potentially compound page. */
static inline unsigned long page_size(struct page *page)
{
return page_maybe_dma_pinned(page);
}
-/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
+/**
+ * is_zero_page - Query if a page is a zero page
+ * @page: The page to query
+ *
+ * This returns true if @page is one of the permanent zero pages.
+ */
+static inline bool is_zero_page(const struct page *page)
+{
+ return is_zero_pfn(page_to_pfn(page));
+}
+
+/**
+ * is_zero_folio - Query if a folio is a zero page
+ * @folio: The folio to query
+ *
+ * This returns true if @folio is one of the permanent zero pages.
+ */
+static inline bool is_zero_folio(const struct folio *folio)
+{
+ return is_zero_page(&folio->page);
+}
+
+/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin folios */
#ifdef CONFIG_MIGRATION
-static inline bool is_longterm_pinnable_page(struct page *page)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
{
#ifdef CONFIG_CMA
- int mt = get_pageblock_migratetype(page);
+ int mt = folio_migratetype(folio);
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
- /* The zero page may always be pinned */
- if (is_zero_pfn(page_to_pfn(page)))
+ /* The zero page can be "pinned" but gets special handling. */
+ if (is_zero_folio(folio))
return true;
/* Coherent device memory must always allow eviction. */
- if (is_device_coherent_page(page))
+ if (folio_is_device_coherent(folio))
return false;
- /* Otherwise, non-movable zone pages can be pinned. */
- return !is_zone_movable_page(page);
+ /* Otherwise, non-movable zone folios can be pinned. */
+ return !folio_is_zone_movable(folio);
+
}
#else
-static inline bool is_longterm_pinnable_page(struct page *page)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
{
return true;
}
#endif
-static inline bool folio_is_longterm_pinnable(struct folio *folio)
-{
- return is_longterm_pinnable_page(&folio->page);
-}
-
static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
unmap_mapping_range(mapping, holebegin, holelen, 0);
}
+static inline struct vm_area_struct *vma_lookup(struct mm_struct *mm,
+ unsigned long addr);
+
extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
long get_user_pages_remote(struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *locked);
+ unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ int *locked);
long pin_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *locked);
+ int *locked);
+
+static inline struct page *get_user_page_vma_remote(struct mm_struct *mm,
+ unsigned long addr,
+ int gup_flags,
+ struct vm_area_struct **vmap)
+{
+ struct page *page;
+ struct vm_area_struct *vma;
+ int got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL);
+
+ if (got < 0)
+ return ERR_PTR(got);
+ if (got == 0)
+ return NULL;
+
+ vma = vma_lookup(mm, addr);
+ if (WARN_ON_ONCE(!vma)) {
+ put_page(page);
+ return ERR_PTR(-EINVAL);
+ }
+
+ *vmap = vma;
+ return page;
+}
+
long get_user_pages(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas);
+ unsigned int gup_flags, struct page **pages);
long pin_user_pages(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas);
+ unsigned int gup_flags, struct page **pages);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages);
int pin_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
+void folio_add_pin(struct folio *folio);
int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \
MM_CP_UFFD_WP_RESOLVE)
+bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
{
dec_lruvec_page_state(page, NR_PAGETABLE);
}
-#define pte_offset_map_lock(mm, pmd, address, ptlp) \
-({ \
- spinlock_t *__ptl = pte_lockptr(mm, pmd); \
- pte_t *__pte = pte_offset_map(pmd, address); \
- *(ptlp) = __ptl; \
- spin_lock(__ptl); \
- __pte; \
-})
+pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
+static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
+{
+ return __pte_offset_map(pmd, addr, NULL);
+}
+
+pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp);
+static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp)
+{
+ pte_t *pte;
+
+ __cond_lock(*ptlp, pte = __pte_offset_map_lock(mm, pmd, addr, ptlp));
+ return pte;
+}
+
+pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp);
#define pte_unmap_unlock(pte, ptl) do { \
spin_unlock(ptl); \
extern void adjust_managed_page_count(struct page *page, long count);
-extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
+extern void reserve_bootmem_region(phys_addr_t start,
+ phys_addr_t end, int nid);
/* Free the reserved page into the buddy system, so it gets managed. */
static inline void free_reserved_page(struct page *page)
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_range(unsigned long, int, unsigned long,
- unsigned long, unsigned long, enum meminit_context,
- struct vmem_altmap *, int migratetype);
-extern void setup_per_zone_wmarks(void);
-extern void calculate_min_free_kbytes(void);
-extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
extern void setup_per_cpu_pageset(void);
-/* page_alloc.c */
-extern int min_free_kbytes;
-extern int watermark_boost_factor;
-extern int watermark_scale_factor;
-
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
return static_branch_unlikely(&_debug_pagealloc_enabled);
}
- #ifdef CONFIG_DEBUG_PAGEALLOC
/*
* To support DEBUG_PAGEALLOC architecture must ensure that
* __kernel_map_pages() never fails
*/
extern void __kernel_map_pages(struct page *page, int numpages, int enable);
-
+ #ifdef CONFIG_DEBUG_PAGEALLOC
static inline void debug_pagealloc_map_pages(struct page *page, int numpages)
{
if (debug_pagealloc_enabled_static())
if (debug_pagealloc_enabled_static())
__kernel_map_pages(page, numpages, 0);
}
+
+extern unsigned int _debug_guardpage_minorder;
+DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
+
+static inline unsigned int debug_guardpage_minorder(void)
+{
+ return _debug_guardpage_minorder;
+}
+
+static inline bool debug_guardpage_enabled(void)
+{
+ return static_branch_unlikely(&_debug_guardpage_enabled);
+}
+
+static inline bool page_is_guard(struct page *page)
+{
+ if (!debug_guardpage_enabled())
+ return false;
+
+ return PageGuard(page);
+}
+
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype);
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
+{
+ if (!debug_guardpage_enabled())
+ return false;
+ return __set_page_guard(zone, page, order, migratetype);
+}
+
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
+ int migratetype);
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype)
+{
+ if (!debug_guardpage_enabled())
+ return;
+ __clear_page_guard(zone, page, order, migratetype);
+}
+
#else /* CONFIG_DEBUG_PAGEALLOC */
static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
+static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool debug_guardpage_enabled(void) { return false; }
+static inline bool page_is_guard(struct page *page) { return false; }
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) { return false; }
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+ unsigned int order, int migratetype) {}
#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Sysfs entries for memory failure handling statistics.
+ */
+extern const struct attribute_group memory_failure_attr_group;
extern void memory_failure_queue(unsigned long pfn, int flags);
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
MF_MSG_UNKNOWN,
};
-/*
- * Sysfs entries for memory failure handling statistics.
- */
-extern const struct attribute_group memory_failure_attr_group;
-
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
extern void clear_huge_page(struct page *page,
unsigned long addr_hint,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
-#ifdef CONFIG_DEBUG_PAGEALLOC
-extern unsigned int _debug_guardpage_minorder;
-DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
-
-static inline unsigned int debug_guardpage_minorder(void)
-{
- return _debug_guardpage_minorder;
-}
-
-static inline bool debug_guardpage_enabled(void)
-{
- return static_branch_unlikely(&_debug_guardpage_enabled);
-}
-
-static inline bool page_is_guard(struct page *page)
-{
- if (!debug_guardpage_enabled())
- return false;
-
- return PageGuard(page);
-}
-#else
-static inline unsigned int debug_guardpage_minorder(void) { return 0; }
-static inline bool debug_guardpage_enabled(void) { return false; }
-static inline bool page_is_guard(struct page *page) { return false; }
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
#if MAX_NUMNODES > 1
void __init setup_nr_node_ids(void);
#else
}
#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+
+bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end);
+void accept_memory(phys_addr_t start, phys_addr_t end);
+
+#else
+
+static inline bool range_contains_unaccepted_memory(phys_addr_t start,
+ phys_addr_t end)
+{
+ return false;
+}
+
+static inline void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+}
+
+#endif
+
#endif /* _LINUX_MM_H */
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
-extern dev_t name_to_dev_t(const char *name);
extern bool path_is_mountpoint(const struct path *path);
extern bool our_mnt(struct vfsmount *mnt);
struct vfsmount *);
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
+ extern int cifs_root_data(char **dev, char **opts);
+
#endif /* _LINUX_MOUNT_H */
*/
#define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */
#define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */
+#define ARMPMU_EVT_63BIT 0x00004 /* Event uses a 63bit counter */
static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT);
static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT);
+static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT);
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define kvm_host_pmu_init(x) do { } while(0)
#endif
+ bool arm_pmu_irq_is_nmi(void);
+
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
void armpmu_free(struct arm_pmu *pmu);
#define DECLARE_BITMAP(name,bits) \
unsigned long name[BITS_TO_LONGS(bits)]
+#ifdef __SIZEOF_INT128__
+typedef __s128 s128;
+typedef __u128 u128;
+#endif
+
typedef u32 __kernel_dev_t;
typedef __kernel_fd_set fd_set;
typedef __kernel_gid16_t gid16_t;
typedef unsigned long uintptr_t;
+ typedef long intptr_t;
#ifdef CONFIG_HAVE_UID16
/* This is defined by include/asm-{arch}/posix_types.h */
#include "do_mounts.h"
int root_mountflags = MS_RDONLY | MS_SILENT;
-static char * __initdata root_device_name;
static char __initdata saved_root_name[64];
static int root_wait;
__setup("ro", readonly);
__setup("rw", readwrite);
-#ifdef CONFIG_BLOCK
-struct uuidcmp {
- const char *uuid;
- int len;
-};
-
-/**
- * match_dev_by_uuid - callback for finding a partition using its uuid
- * @dev: device passed in by the caller
- * @data: opaque pointer to the desired struct uuidcmp to match
- *
- * Returns 1 if the device matches, and 0 otherwise.
- */
-static int match_dev_by_uuid(struct device *dev, const void *data)
-{
- struct block_device *bdev = dev_to_bdev(dev);
- const struct uuidcmp *cmp = data;
-
- if (!bdev->bd_meta_info ||
- strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len))
- return 0;
- return 1;
-}
-
-/**
- * devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid_str: char array containing ascii UUID
- *
- * The function will return the first partition which contains a matching
- * UUID value in its partition_meta_info struct. This does not search
- * by filesystem UUIDs.
- *
- * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be
- * extracted and used as an offset from the partition identified by the UUID.
- *
- * Returns the matching dev_t on success or 0 on failure.
- */
-static dev_t devt_from_partuuid(const char *uuid_str)
-{
- struct uuidcmp cmp;
- struct device *dev = NULL;
- dev_t devt = 0;
- int offset = 0;
- char *slash;
-
- cmp.uuid = uuid_str;
-
- slash = strchr(uuid_str, '/');
- /* Check for optional partition number offset attributes. */
- if (slash) {
- char c = 0;
-
- /* Explicitly fail on poor PARTUUID syntax. */
- if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1)
- goto clear_root_wait;
- cmp.len = slash - uuid_str;
- } else {
- cmp.len = strlen(uuid_str);
- }
-
- if (!cmp.len)
- goto clear_root_wait;
-
- dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
- if (!dev)
- return 0;
-
- if (offset) {
- /*
- * Attempt to find the requested partition by adding an offset
- * to the partition number found by UUID.
- */
- devt = part_devt(dev_to_disk(dev),
- dev_to_bdev(dev)->bd_partno + offset);
- } else {
- devt = dev->devt;
- }
-
- put_device(dev);
- return devt;
-
-clear_root_wait:
- pr_err("VFS: PARTUUID= is invalid.\n"
- "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
- if (root_wait)
- pr_err("Disabling rootwait; root= is invalid.\n");
- root_wait = 0;
- return 0;
-}
-
-/**
- * match_dev_by_label - callback for finding a partition using its label
- * @dev: device passed in by the caller
- * @data: opaque pointer to the label to match
- *
- * Returns 1 if the device matches, and 0 otherwise.
- */
-static int match_dev_by_label(struct device *dev, const void *data)
-{
- struct block_device *bdev = dev_to_bdev(dev);
- const char *label = data;
-
- if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname))
- return 0;
- return 1;
-}
-
-static dev_t devt_from_partlabel(const char *label)
-{
- struct device *dev;
- dev_t devt = 0;
-
- dev = class_find_device(&block_class, NULL, label, &match_dev_by_label);
- if (dev) {
- devt = dev->devt;
- put_device(dev);
- }
-
- return devt;
-}
-
-static dev_t devt_from_devname(const char *name)
-{
- dev_t devt = 0;
- int part;
- char s[32];
- char *p;
-
- if (strlen(name) > 31)
- return 0;
- strcpy(s, name);
- for (p = s; *p; p++) {
- if (*p == '/')
- *p = '!';
- }
-
- devt = blk_lookup_devt(s, 0);
- if (devt)
- return devt;
-
- /*
- * Try non-existent, but valid partition, which may only exist after
- * opening the device, like partitioned md devices.
- */
- while (p > s && isdigit(p[-1]))
- p--;
- if (p == s || !*p || *p == '0')
- return 0;
-
- /* try disk name without <part number> */
- part = simple_strtoul(p, NULL, 10);
- *p = '\0';
- devt = blk_lookup_devt(s, part);
- if (devt)
- return devt;
-
- /* try disk name without p<part number> */
- if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
- return 0;
- p[-1] = '\0';
- return blk_lookup_devt(s, part);
-}
-#endif /* CONFIG_BLOCK */
-
-static dev_t devt_from_devnum(const char *name)
-{
- unsigned maj, min, offset;
- dev_t devt = 0;
- char *p, dummy;
-
- if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
- sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) {
- devt = MKDEV(maj, min);
- if (maj != MAJOR(devt) || min != MINOR(devt))
- return 0;
- } else {
- devt = new_decode_dev(simple_strtoul(name, &p, 16));
- if (*p)
- return 0;
- }
-
- return devt;
-}
-
-/*
- * Convert a name into device number. We accept the following variants:
- *
- * 1) <hex_major><hex_minor> device number in hexadecimal represents itself
- * no leading 0x, for example b302.
- * 2) /dev/nfs represents Root_NFS (0xff)
- * 3) /dev/<disk_name> represents the device number of disk
- * 4) /dev/<disk_name><decimal> represents the device number
- * of partition - device number of disk plus the partition number
- * 5) /dev/<disk_name>p<decimal> - same as the above, that form is
- * used when disk name of partitioned disk ends on a digit.
- * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
- * unique id of a partition if the partition table provides it.
- * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
- * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
- * filled hex representation of the 32-bit "NT disk signature", and PP
- * is a zero-filled hex representation of the 1-based partition number.
- * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
- * a partition with a known unique id.
- * 8) <major>:<minor> major and minor number of the device separated by
- * a colon.
- * 9) PARTLABEL=<name> with name being the GPT partition label.
- * MSDOS partitions do not support labels!
- * 10) /dev/cifs represents Root_CIFS (0xfe)
- *
- * If name doesn't have fall into the categories above, we return (0,0).
- * block_class is used to check if something is a disk name. If the disk
- * name contains slashes, the device name has them replaced with
- * bangs.
- */
-dev_t name_to_dev_t(const char *name)
-{
- if (strcmp(name, "/dev/nfs") == 0)
- return Root_NFS;
- if (strcmp(name, "/dev/cifs") == 0)
- return Root_CIFS;
- if (strcmp(name, "/dev/ram") == 0)
- return Root_RAM0;
-#ifdef CONFIG_BLOCK
- if (strncmp(name, "PARTUUID=", 9) == 0)
- return devt_from_partuuid(name + 9);
- if (strncmp(name, "PARTLABEL=", 10) == 0)
- return devt_from_partlabel(name + 10);
- if (strncmp(name, "/dev/", 5) == 0)
- return devt_from_devname(name + 5);
-#endif
- return devt_from_devnum(name);
-}
-EXPORT_SYMBOL_GPL(name_to_dev_t);
-
static int __init root_dev_setup(char *line)
{
strscpy(saved_root_name, line, sizeof(saved_root_name));
__setup("rootdelay=", root_delay_setup);
/* This can return zero length strings. Caller should check */
-static int __init split_fs_names(char *page, size_t size, char *names)
+static int __init split_fs_names(char *page, size_t size)
{
int count = 1;
char *p = page;
return ret;
}
-void __init mount_block_root(char *name, int flags)
+void __init mount_root_generic(char *name, char *pretty_name, int flags)
{
struct page *page = alloc_page(GFP_KERNEL);
char *fs_names = page_address(page);
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names)
- num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
+ num_fs = split_fs_names(fs_names, PAGE_SIZE);
else
num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
retry:
* and give them a list of the available devices
*/
printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
- root_device_name, b, err);
+ pretty_name, b, err);
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
-
printk_all_partitions();
+
+ if (root_fs_names)
+ num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
+ if (!num_fs)
+ pr_err("Can't find any bdev filesystem to be used for mount!\n");
+ else {
+ pr_err("List of all bdev filesystems:\n");
+ for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1)
+ pr_err(" %s", p);
+ pr_err("\n");
+ }
+
panic("VFS: Unable to mount root fs on %s", b);
}
if (!(flags & SB_RDONLY)) {
#define NFSROOT_TIMEOUT_MAX 30
#define NFSROOT_RETRY_MAX 5
-static int __init mount_nfs_root(void)
+static void __init mount_nfs_root(void)
{
char *root_dev, *root_data;
unsigned int timeout;
- int try, err;
+ int try;
- err = nfs_root_data(&root_dev, &root_data);
- if (err != 0)
- return 0;
+ if (nfs_root_data(&root_dev, &root_data))
+ goto fail;
/*
* The server or network may not be ready, so try several
*/
timeout = NFSROOT_TIMEOUT_MIN;
for (try = 1; ; try++) {
- err = do_mount_root(root_dev, "nfs",
- root_mountflags, root_data);
- if (err == 0)
- return 1;
+ if (!do_mount_root(root_dev, "nfs", root_mountflags, root_data))
+ return;
if (try > NFSROOT_RETRY_MAX)
break;
if (timeout > NFSROOT_TIMEOUT_MAX)
timeout = NFSROOT_TIMEOUT_MAX;
}
- return 0;
+fail:
+ pr_err("VFS: Unable to mount root fs via NFS.\n");
+}
+#else
+static inline void mount_nfs_root(void)
+{
}
-#endif
+#endif /* CONFIG_ROOT_NFS */
#ifdef CONFIG_CIFS_ROOT
- extern int cifs_root_data(char **dev, char **opts);
-
#define CIFSROOT_TIMEOUT_MIN 5
#define CIFSROOT_TIMEOUT_MAX 30
#define CIFSROOT_RETRY_MAX 5
-static int __init mount_cifs_root(void)
+static void __init mount_cifs_root(void)
{
char *root_dev, *root_data;
unsigned int timeout;
- int try, err;
+ int try;
- err = cifs_root_data(&root_dev, &root_data);
- if (err != 0)
- return 0;
+ if (cifs_root_data(&root_dev, &root_data))
+ goto fail;
timeout = CIFSROOT_TIMEOUT_MIN;
for (try = 1; ; try++) {
- err = do_mount_root(root_dev, "cifs", root_mountflags,
- root_data);
- if (err == 0)
- return 1;
+ if (!do_mount_root(root_dev, "cifs", root_mountflags,
+ root_data))
+ return;
if (try > CIFSROOT_RETRY_MAX)
break;
if (timeout > CIFSROOT_TIMEOUT_MAX)
timeout = CIFSROOT_TIMEOUT_MAX;
}
- return 0;
+fail:
+ pr_err("VFS: Unable to mount root fs via SMB.\n");
}
-#endif
+#else
+static inline void mount_cifs_root(void)
+{
+}
+#endif /* CONFIG_CIFS_ROOT */
static bool __init fs_is_nodev(char *fstype)
{
return ret;
}
-static int __init mount_nodev_root(void)
+static int __init mount_nodev_root(char *root_device_name)
{
char *fs_names, *fstype;
int err = -EINVAL;
fs_names = (void *)__get_free_page(GFP_KERNEL);
if (!fs_names)
return -EINVAL;
- num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
+ num_fs = split_fs_names(fs_names, PAGE_SIZE);
for (i = 0, fstype = fs_names; i < num_fs;
i++, fstype += strlen(fstype) + 1) {
return err;
}
-void __init mount_root(void)
+#ifdef CONFIG_BLOCK
+static void __init mount_block_root(char *root_device_name)
{
-#ifdef CONFIG_ROOT_NFS
- if (ROOT_DEV == Root_NFS) {
- if (!mount_nfs_root())
- printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n");
- return;
+ int err = create_dev("/dev/root", ROOT_DEV);
+
+ if (err < 0)
+ pr_emerg("Failed to create /dev/root: %d\n", err);
+ mount_root_generic("/dev/root", root_device_name, root_mountflags);
+}
+#else
+static inline void mount_block_root(char *root_device_name)
+{
+}
+#endif /* CONFIG_BLOCK */
+
+void __init mount_root(char *root_device_name)
+{
+ switch (ROOT_DEV) {
+ case Root_NFS:
+ mount_nfs_root();
+ break;
+ case Root_CIFS:
+ mount_cifs_root();
+ break;
+ case Root_Generic:
+ mount_root_generic(root_device_name, root_device_name,
+ root_mountflags);
+ break;
+ case 0:
+ if (root_device_name && root_fs_names &&
+ mount_nodev_root(root_device_name) == 0)
+ break;
+ fallthrough;
+ default:
+ mount_block_root(root_device_name);
+ break;
}
-#endif
-#ifdef CONFIG_CIFS_ROOT
- if (ROOT_DEV == Root_CIFS) {
- if (!mount_cifs_root())
- printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n");
+}
+
+/* wait for any asynchronous scanning to complete */
+static void __init wait_for_root(char *root_device_name)
+{
+ if (ROOT_DEV != 0)
return;
- }
-#endif
- if (ROOT_DEV == 0 && root_device_name && root_fs_names) {
- if (mount_nodev_root() == 0)
- return;
- }
-#ifdef CONFIG_BLOCK
- {
- int err = create_dev("/dev/root", ROOT_DEV);
- if (err < 0)
- pr_emerg("Failed to create /dev/root: %d\n", err);
- mount_block_root("/dev/root", root_mountflags);
+ pr_info("Waiting for root device %s...\n", root_device_name);
+
+ while (!driver_probe_done() ||
+ early_lookup_bdev(root_device_name, &ROOT_DEV) < 0)
+ msleep(5);
+ async_synchronize_full();
+
+}
+
+static dev_t __init parse_root_device(char *root_device_name)
+{
+ int error;
+ dev_t dev;
+
+ if (!strncmp(root_device_name, "mtd", 3) ||
+ !strncmp(root_device_name, "ubi", 3))
+ return Root_Generic;
+ if (strcmp(root_device_name, "/dev/nfs") == 0)
+ return Root_NFS;
+ if (strcmp(root_device_name, "/dev/cifs") == 0)
+ return Root_CIFS;
+ if (strcmp(root_device_name, "/dev/ram") == 0)
+ return Root_RAM0;
+
+ error = early_lookup_bdev(root_device_name, &dev);
+ if (error) {
+ if (error == -EINVAL && root_wait) {
+ pr_err("Disabling rootwait; root= is invalid.\n");
+ root_wait = 0;
+ }
+ return 0;
}
-#endif
+ return dev;
}
/*
md_run_setup();
- if (saved_root_name[0]) {
- root_device_name = saved_root_name;
- if (!strncmp(root_device_name, "mtd", 3) ||
- !strncmp(root_device_name, "ubi", 3)) {
- mount_block_root(root_device_name, root_mountflags);
- goto out;
- }
- ROOT_DEV = name_to_dev_t(root_device_name);
- if (strncmp(root_device_name, "/dev/", 5) == 0)
- root_device_name += 5;
- }
+ if (saved_root_name[0])
+ ROOT_DEV = parse_root_device(saved_root_name);
- if (initrd_load())
+ if (initrd_load(saved_root_name))
goto out;
- /* wait for any asynchronous scanning to complete */
- if ((ROOT_DEV == 0) && root_wait) {
- printk(KERN_INFO "Waiting for root device %s...\n",
- saved_root_name);
- while (driver_probe_done() != 0 ||
- (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
- msleep(5);
- async_synchronize_full();
- }
-
- mount_root();
+ if (root_wait)
+ wait_for_root(saved_root_name);
+ mount_root(saved_root_name);
out:
devtmpfs_mount();
init_mount(".", "/", NULL, MS_MOVE, NULL);
#include <linux/cache.h>
#include <linux/rodata_test.h>
#include <linux/jump_label.h>
-#include <linux/mem_encrypt.h>
#include <linux/kcsan.h>
#include <linux/init_syscalls.h>
#include <linux/stackdepot.h>
#include <net/net_namespace.h>
#include <asm/io.h>
-#include <asm/bugs.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
static int kernel_init(void *);
- extern void init_IRQ(void);
- extern void radix_tree_init(void);
- extern void maple_tree_init(void);
-
/*
* Debug helper: via this flag we know that we are in 'early bootup code'
* where only the boot processor is running with IRQ disabled. This means
#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
- extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
void (*__initdata late_time_init)(void);
const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
- extern const struct obs_kernel_param __setup_start[], __setup_end[];
-
static bool __init obsolete_checksetup(char *line)
{
const struct obs_kernel_param *p;
}
#endif
-void __init __weak mem_encrypt_init(void) { }
-
void __init __weak poking_init(void) { }
void __init __weak pgtable_cache_init(void) { }
memblock_free(unknown_options, len);
}
-asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(void)
+asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
+void start_kernel(void)
{
char *command_line;
char *after_dashes;
sched_clock_init();
calibrate_delay();
- /*
- * This needs to be called before any devices perform DMA
- * operations that might use the SWIOTLB bounce buffers. It will
- * mark the bounce buffers as decrypted so that their usage will
- * not cause "plain-text" data to be decrypted when accessed. It
- * must be called after late_time_init() so that Hyper-V x86/x64
- * hypercalls work when the SWIOTLB bounce buffers are decrypted.
- */
- mem_encrypt_init();
+ arch_cpu_finalize_init();
pid_idr_init();
anon_vma_init();
taskstats_init_early();
delayacct_init();
- check_bugs();
-
acpi_subsystem_init();
arch_post_acpi_subsys_init();
kcsan_init();
/* Do the rest non-__init'ed, we're now alive */
arch_call_rest_init();
+ /*
+ * Avoid stack canaries in callers of boot_init_stack_canary for gcc-10
+ * and older.
+ */
+#if !__has_attribute(__no_stack_protector__)
prevent_tail_call_optimization();
+#endif
}
/* Call all constructor functions linked into the kernel. */
}
- extern initcall_entry_t __initcall_start[];
- extern initcall_entry_t __initcall0_start[];
- extern initcall_entry_t __initcall1_start[];
- extern initcall_entry_t __initcall2_start[];
- extern initcall_entry_t __initcall3_start[];
- extern initcall_entry_t __initcall4_start[];
- extern initcall_entry_t __initcall5_start[];
- extern initcall_entry_t __initcall6_start[];
- extern initcall_entry_t __initcall7_start[];
- extern initcall_entry_t __initcall_end[];
-
static initcall_entry_t *initcall_levels[] __initdata = {
__initcall0_start,
__initcall1_start,
{
int i;
int ret;
+ int nr_charged = 0;
- BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
if (ret)
goto err;
+ nr_charged++;
}
return 0;
err:
- /*
- * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
- * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
- * ignore this page.
- */
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ for (i = 0; i < nr_charged; i++)
memcg_kmem_uncharge_page(vm->pages[i], 0);
return ret;
}
arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
+ bpf_task_storage_free(tsk);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
- bpf_task_storage_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
p->flags &= ~PF_KTHREAD;
if (args->kthread)
p->flags |= PF_KTHREAD;
- if (args->user_worker)
- p->flags |= PF_USER_WORKER;
- if (args->io_thread) {
+ if (args->user_worker) {
/*
- * Mark us an IO worker, and block any signal that isn't
+ * Mark us a user worker, and block any signal that isn't
* fatal or STOP
*/
- p->flags |= PF_IO_WORKER;
+ p->flags |= PF_USER_WORKER;
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
}
+ if (args->io_thread)
+ p->flags |= PF_IO_WORKER;
if (args->name)
strscpy_pad(p->comm, args->name, sizeof(p->comm));
if (retval)
goto bad_fork_cleanup_io;
- if (args->ignore_signals)
- ignore_signals(p);
-
stackleak_task_init(p);
if (pid != &init_struct_pid) {
{
unsigned long bss_addr;
unsigned long offset;
+ size_t sechdrs_size;
Elf_Shdr *sechdrs;
int i;
* The section headers in kexec_purgatory are read-only. In order to
* have them modifiable make a temporary copy.
*/
- sechdrs = vzalloc(array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum));
+ sechdrs_size = array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum);
+ sechdrs = vzalloc(sechdrs_size);
if (!sechdrs)
return -ENOMEM;
- memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff,
- pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+ memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff, sechdrs_size);
pi->sechdrs = sechdrs;
offset = 0;
}
offset = ALIGN(offset, align);
+
+ /*
+ * Check if the segment contains the entry point, if so,
+ * calculate the value of image->start based on it.
+ * If the compiler has produced more than one .text section
+ * (Eg: .text.hot), they are generally after the main .text
+ * section, and they shall not be used to calculate
+ * image->start. So do not re-calculate image->start if it
+ * is not set to the initial value, and warn the user so they
+ * have a chance to fix their purgatory's linker script.
+ */
if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
pi->ehdr->e_entry < (sechdrs[i].sh_addr
- + sechdrs[i].sh_size)) {
+ + sechdrs[i].sh_size) &&
+ !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
kbuf->image->start -= sechdrs[i].sh_addr;
kbuf->image->start += kbuf->mem + offset;
}
}
EXPORT_SYMBOL_GPL(kthread_should_park);
+bool kthread_should_stop_or_park(void)
+{
+ struct kthread *kthread = __to_kthread(current);
+
+ if (!kthread)
+ return false;
+
+ return kthread->flags & (BIT(KTHREAD_SHOULD_STOP) | BIT(KTHREAD_SHOULD_PARK));
+}
+
/**
* kthread_freezable_should_stop - should this freezable kthread return now?
* @was_frozen: optional out parameter, indicates whether %current was frozen
* @comp: Completion to complete
* @code: The integer value to return to kthread_stop().
*
- * If present complete @comp and the reuturn code to kthread_stop().
+ * If present, complete @comp and then return code to kthread_stop().
*
* A kernel thread whose module may be removed after the completion of
- * @comp can use this function exit safely.
+ * @comp can use this function to exit safely.
*
* Does not return.
*/
Say N if unsure.
- config HARDLOCKUP_DETECTOR_PERF
+ config HAVE_HARDLOCKUP_DETECTOR_BUDDY
bool
- select SOFTLOCKUP_DETECTOR
+ depends on SMP
+ default y
#
- # Enables a timestamp based low pass filter to compensate for perf based
- # hard lockup detection which runs too fast due to turbo modes.
+ # Global switch whether to build a hardlockup detector at all. It is available
+ # only when the architecture supports at least one implementation. There are
+ # two exceptions. The hardlockup detector is never enabled on:
#
- config HARDLOCKUP_CHECK_TIMESTAMP
- bool
-
+ # s390: it reported many false positives there
#
- # arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
- # lockup detector rather than the perf based detector.
+ # sparc64: has a custom implementation which is not using the common
+ # hardlockup command line options and sysctl interface.
#
config HARDLOCKUP_DETECTOR
bool "Detect Hard Lockups"
- depends on DEBUG_KERNEL && !S390
- depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+ depends on DEBUG_KERNEL && !S390 && !HARDLOCKUP_DETECTOR_SPARC64
+ depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY || HAVE_HARDLOCKUP_DETECTOR_ARCH
+ imply HARDLOCKUP_DETECTOR_PERF
+ imply HARDLOCKUP_DETECTOR_BUDDY
+ imply HARDLOCKUP_DETECTOR_ARCH
select LOCKUP_DETECTOR
- select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+
help
Say Y here to enable the kernel to act as a watchdog to detect
hard lockups.
chance to run. The current stack trace is displayed upon detection
and the system will stay locked up.
+ #
+ # Note that arch-specific variants are always preferred.
+ #
+ config HARDLOCKUP_DETECTOR_PREFER_BUDDY
+ bool "Prefer the buddy CPU hardlockup detector"
+ depends on HARDLOCKUP_DETECTOR
+ depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY
+ depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
+ help
+ Say Y here to prefer the buddy hardlockup detector over the perf one.
+
+ With the buddy detector, each CPU uses its softlockup hrtimer
+ to check that the next CPU is processing hrtimer interrupts by
+ verifying that a counter is increasing.
+
+ This hardlockup detector is useful on systems that don't have
+ an arch-specific hardlockup detector or if resources needed
+ for the hardlockup detector are better used for other things.
+
+ config HARDLOCKUP_DETECTOR_PERF
+ bool
+ depends on HARDLOCKUP_DETECTOR
+ depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY
+ depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
+
+ config HARDLOCKUP_DETECTOR_BUDDY
+ bool
+ depends on HARDLOCKUP_DETECTOR
+ depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY
+ depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY
+ depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
+
+ config HARDLOCKUP_DETECTOR_ARCH
+ bool
+ depends on HARDLOCKUP_DETECTOR
+ depends on HAVE_HARDLOCKUP_DETECTOR_ARCH
+ help
+ The arch-specific implementation of the hardlockup detector will
+ be used.
+
+ #
+ # Both the "perf" and "buddy" hardlockup detectors count hrtimer
+ # interrupts. This config enables functions managing this common code.
+ #
+ config HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
+ bool
+ select SOFTLOCKUP_DETECTOR
+
+ #
+ # Enables a timestamp based low pass filter to compensate for perf based
+ # hard lockup detection which runs too fast due to turbo modes.
+ #
+ config HARDLOCKUP_CHECK_TIMESTAMP
+ bool
+
config BOOTPARAM_HARDLOCKUP_PANIC
bool "Panic (Reboot) On Hard Lockups"
depends on HARDLOCKUP_DETECTOR
state. This can be configured through kernel parameter
"workqueue.watchdog_thresh" and its sysfs counterpart.
+config WQ_CPU_INTENSIVE_REPORT
+ bool "Report per-cpu work items which hog CPU for too long"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here to enable reporting of concurrency-managed per-cpu work
+ items that hog CPUs for longer than
+ workqueue.cpu_intensive_threshold_us. Workqueue automatically
+ detects and excludes them from concurrency management to prevent
+ them from stalling other per-cpu work items. Occassional
+ triggering may not necessarily indicate a problem. Repeated
+ triggering likely indicates that the work item should be switched
+ to use an unbound workqueue.
+
config TEST_LOCKUP
tristate "Test module to generate lockups"
depends on m
tristate "Test the XArray code at runtime"
config TEST_MAPLE_TREE
- depends on DEBUG_KERNEL
- select DEBUG_MAPLE_TREE
- tristate "Test the Maple Tree code at runtime"
+ tristate "Test the Maple Tree code at runtime or module load"
+ help
+ Enable this option to test the maple tree code functions at boot, or
+ when the module is loaded. Enable "Debug Maple Trees" will enable
+ more verbose output on failures.
+
+ If unsure, say N.
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
If unsure, say N.
+config CHECKSUM_KUNIT
+ tristate "KUnit test checksum functions at runtime" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option to test the checksum functions at boot.
+
+ KUnit tests run during boot and output the results to the debug log
+ in TAP format (http://testanything.org/). Only useful for kernel devs
+ running the KUnit test harness, and not intended for inclusion into a
+ production build.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
config HASH_KUNIT_TEST
tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS
depends on KUNIT
config FORTIFY_KUNIT_TEST
tristate "Test fortified str*() and mem*() function internals at runtime" if !KUNIT_ALL_TESTS
- depends on KUNIT && FORTIFY_SOURCE
+ depends on KUNIT
default KUNIT_ALL_TESTS
help
Builds unit tests for checking internals of FORTIFY_SOURCE as used
If unsure, say N.
+config STRCAT_KUNIT_TEST
+ tristate "Test strcat() family of functions at runtime" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+
config STRSCPY_KUNIT_TEST
tristate "Test strscpy*() family of functions at runtime" if !KUNIT_ALL_TESTS
depends on KUNIT
return rc;
}
#else
- struct page * __meminit populate_section_memmap(unsigned long pfn,
+ static struct page * __meminit populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
return 0;
}
-void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
- unsigned long nr_pages, unsigned long map_offset,
- struct vmem_altmap *altmap)
+void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap)
{
+ struct mem_section *ms = __pfn_to_section(pfn);
+
+ if (WARN_ON_ONCE(!valid_section(ms)))
+ return;
+
section_deactivate(pfn, nr_pages, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */