high limit is used and monitored properly, this limit's
utility is limited to providing the final safety net.
+ memory.oom.group
+ A read-write single value file which exists on non-root
+ cgroups. The default value is "0".
+
+ Determines whether the cgroup should be treated as
+ an indivisible workload by the OOM killer. If set,
+ all tasks belonging to the cgroup or to its descendants
+ (if the memory cgroup is not a leaf cgroup) are killed
+ together or not at all. This can be used to avoid
+ partial kills to guarantee workload integrity.
+
+ Tasks with the OOM protection (oom_score_adj set to -1000)
+ are treated as an exception and are never killed.
+
+ If the OOM killer is invoked in a cgroup, it's not going
+ to kill any tasks outside of this cgroup, regardless
+ memory.oom.group values of ancestor cgroups.
+
memory.events
A read-only flat-keyed file which exists on non-root cgroups.
The following entries are defined. Unless specified
on: enable the feature
page_poison= [KNL] Boot-time parameter changing the state of
- poisoning on the buddy allocator.
- off: turn off poisoning
+ poisoning on the buddy allocator, available with
+ CONFIG_PAGE_POISONING=y.
+ off: turn off poisoning (default)
on: turn on poisoning
panic= [KNL] Kernel behaviour on panic: delay <timeout>
VmallocTotal: 112216 kB
VmallocUsed: 428 kB
VmallocChunk: 111088 kB
+Percpu: 62080 kB
HardwareCorrupted: 0 kB
AnonHugePages: 49152 kB
ShmemHugePages: 0 kB
VmallocTotal: total size of vmalloc memory area
VmallocUsed: amount of vmalloc area which is used
VmallocChunk: largest contiguous block of vmalloc area which is free
+ Percpu: Memory allocated to the percpu allocator used to back percpu
+ allocations. This stat excludes the cost of metadata.
..............................................................................
- hung_task_panic
- hung_task_check_count
- hung_task_timeout_secs
+- hung_task_check_interval_secs
- hung_task_warnings
- hyperv_record_panic_msg
- kexec_load_disabled
hung_task_timeout_secs:
-Check interval. When a task in D state did not get scheduled
+When a task in D state did not get scheduled
for more than this value report a warning.
This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
==============================================================
+hung_task_check_interval_secs:
+
+Hung task check interval. If hung task checking is enabled
+(see hung_task_timeout_secs), the check is done every
+hung_task_check_interval_secs seconds.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0 (default): means use hung_task_timeout_secs as checking interval.
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+==============================================================
+
hung_task_warnings:
The maximum number of warnings to report. During a check interval
1) kernel doesn't guarantee, that new object will have desired id. So,
it's up to userspace, how to handle an object with "wrong" id.
2) Toggle with non-default value will be set back to -1 by kernel after
-successful IPC object allocation.
+successful IPC object allocation. If an IPC object allocation syscall
+fails, it is undefined if the value remains unmodified or is reset to -1.
==============================================================
The default value is 0.
See Documentation/vm/overcommit-accounting.rst and
-mm/mmap.c::__vm_enough_memory() for more information.
+mm/util.c::__vm_enough_memory() for more information.
==============================================================
against various use-after-free conditions that can be used in
security flaw exploits.
+config HAVE_ARCH_PREL32_RELOCATIONS
+ bool
+ help
+ May be selected by an architecture if it supports place-relative
+ 32-bit relocations, both in the toolchain and in the module loader,
+ in which case relative references can be used in special sections
+ for PCI fixup, initcalls etc which are only half the size on 64 bit
+ architectures, and don't require runtime relocation on relocatable
+ kernels.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
* atomic helpers. Insert it into the gate_vma so that it is visible
* through ptrace and /proc/<pid>/mem.
*/
-static struct vm_area_struct gate_vma = {
- .vm_start = 0xffff0000,
- .vm_end = 0xffff0000 + PAGE_SIZE,
- .vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC,
-};
+static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{
vma_init(&gate_vma, NULL);
gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+ gate_vma.vm_start = 0xffff0000;
+ gate_vma.vm_end = 0xffff0000 + PAGE_SIZE;
+ gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
return 0;
}
arch_initcall(gate_vma_init);
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JIT if !PPC64
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
* _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
* While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
* which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
*/
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
#include "misc.h"
#include "error.h"
generic-y += dma-contiguous.h
generic-y += early_ioremap.h
+generic-y += export.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include <asm-generic/export.h>
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end,
+ bool blockable)
{
unsigned long apic_address;
apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (start <= apic_address && apic_address < end)
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
+ return 0;
}
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
struct device_attribute *attr, const char *buf, size_t len)
{
char *file_name;
+ size_t sz;
struct file *backing_dev = NULL;
struct inode *inode;
struct address_space *mapping;
goto out;
}
- strlcpy(file_name, buf, len);
+ strlcpy(file_name, buf, PATH_MAX);
+ /* ignore trailing newline */
+ sz = strlen(file_name);
+ if (sz > 0 && file_name[sz - 1] == '\n')
+ file_name[sz - 1] = 0x00;
backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
if (IS_ERR(backing_dev)) {
-D__NO_FORTIFY \
$(call cc-option,-ffreestanding) \
$(call cc-option,-fno-stack-protector) \
+ -D__DISABLE_EXPORTS
GCOV_PROFILE := n
KASAN_SANITIZE := n
*
* @amn: our notifier
*/
-static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
+static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
{
- mutex_lock(&amn->read_lock);
+ if (blockable)
+ mutex_lock(&amn->read_lock);
+ else if (!mutex_trylock(&amn->read_lock))
+ return -EAGAIN;
+
if (atomic_inc_return(&amn->recursion) == 1)
down_read_non_owner(&amn->lock);
mutex_unlock(&amn->read_lock);
+
+ return 0;
}
/**
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
end -= 1;
- amdgpu_mn_read_lock(amn);
+ /* TODO we should be able to split locking for interval tree and
+ * amdgpu_mn_invalidate_node
+ */
+ if (amdgpu_mn_read_lock(amn, blockable))
+ return -EAGAIN;
it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
+ if (!blockable) {
+ amdgpu_mn_read_unlock(amn);
+ return -EAGAIN;
+ }
+
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
amdgpu_mn_invalidate_node(node, start, end);
}
+
+ return 0;
}
/**
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
end -= 1;
- amdgpu_mn_read_lock(amn);
+ if (amdgpu_mn_read_lock(amn, blockable))
+ return -EAGAIN;
it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
+ if (!blockable) {
+ amdgpu_mn_read_unlock(amn);
+ return -EAGAIN;
+ }
+
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
amdgpu_amdkfd_evict_userptr(mem, mm);
}
}
+
+ return 0;
}
/**
mo->attached = false;
}
-static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct i915_mmu_notifier *mn =
container_of(_mn, struct i915_mmu_notifier, mn);
LIST_HEAD(cancelled);
if (RB_EMPTY_ROOT(&mn->objects.rb_root))
- return;
+ return 0;
/* interval ranges are inclusive, but invalidate range is exclusive */
end--;
spin_lock(&mn->lock);
it = interval_tree_iter_first(&mn->objects, start, end);
while (it) {
+ if (!blockable) {
+ spin_unlock(&mn->lock);
+ return -EAGAIN;
+ }
/* The mmu_object is released late when destroying the
* GEM object so it is entirely possible to gain a
* reference on an object in the process of being freed
if (!list_empty(&cancelled))
flush_workqueue(mn->wq);
+
+ return 0;
}
static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
-static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
+static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
struct ttm_operation_ctx ctx = { false, false };
struct interval_tree_node *it;
+ int ret = 0;
/* notification is exclusive, but interval is inclusive */
end -= 1;
- mutex_lock(&rmn->lock);
+ /* TODO we should be able to split locking for interval tree and
+ * the tear down.
+ */
+ if (blockable)
+ mutex_lock(&rmn->lock);
+ else if (!mutex_trylock(&rmn->lock))
+ return -EAGAIN;
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct radeon_bo *bo;
long r;
+ if (!blockable) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
node = container_of(it, struct radeon_mn_node, it);
it = interval_tree_iter_next(it, start, end);
}
}
+out_unlock:
mutex_unlock(&rmn->lock);
+
+ return ret;
}
static const struct mmu_notifier_ops radeon_mn_ops = {
rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
ULLONG_MAX,
ib_umem_notifier_release_trampoline,
+ true,
NULL);
up_read(&context->umem_rwsem);
}
return 0;
}
-static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+ int ret;
if (!context->invalidate_range)
- return;
+ return 0;
+
+ if (blockable)
+ down_read(&context->umem_rwsem);
+ else if (!down_read_trylock(&context->umem_rwsem))
+ return -EAGAIN;
ib_ucontext_notifier_start_account(context);
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+ ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end,
- invalidate_range_start_trampoline, NULL);
+ invalidate_range_start_trampoline,
+ blockable, NULL);
up_read(&context->umem_rwsem);
+
+ return ret;
}
static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
if (!context->invalidate_range)
return;
+ /*
+ * TODO: we currently bail out if there is any sleepable work to be done
+ * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
+ * here. But this is ugly and fragile.
+ */
down_read(&context->umem_rwsem);
rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end,
- invalidate_range_end_trampoline, NULL);
+ invalidate_range_end_trampoline, true, NULL);
up_read(&context->umem_rwsem);
ib_ucontext_notifier_end_account(context);
}
int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
u64 start, u64 last,
umem_call_back cb,
+ bool blockable,
void *cookie)
{
int ret_val = 0;
for (node = rbt_ib_umem_iter_first(root, start, last - 1);
node; node = next) {
+ /* TODO move the blockable decision up to the callback */
+ if (!blockable)
+ return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
-static void mmu_notifier_range_start(struct mmu_notifier *,
+static int mmu_notifier_range_start(struct mmu_notifier *,
struct mm_struct *,
- unsigned long, unsigned long);
+ unsigned long, unsigned long, bool);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
static void do_remove(struct mmu_rb_handler *handler,
handler->ops->remove(handler->ops_arg, node);
}
-static void mmu_notifier_range_start(struct mmu_notifier *mn,
+static int mmu_notifier_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
if (added)
queue_work(handler->wq, &handler->del_work);
+
+ return 0;
}
/*
down_read(&ctx->umem_rwsem);
rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
- mr_leaf_free, imr);
+ mr_leaf_free, true, imr);
up_read(&ctx->umem_rwsem);
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
config BCACHE
tristate "Block device as cache"
+ select CRC64
---help---
Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs.
return 0;
}
-
-/*
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
- * use permitted, subject to terms of PostgreSQL license; see.)
-
- * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
- * usual sort of implementation. (See Ross Williams' excellent introduction
- * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
- * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
- * If we have no working 64-bit type, then fake it with two 32-bit registers.
- *
- * The present implementation is a normal (not "reflected", in Williams'
- * terms) 64-bit CRC, using initial all-ones register contents and a final
- * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
- * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
- *
- * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x + 1
-*/
-
-static const uint64_t crc_table[256] = {
- 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
- 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
- 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
- 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
- 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
- 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
- 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
- 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
- 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
- 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
- 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
- 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
- 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
- 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
- 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
- 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
- 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
- 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
- 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
- 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
- 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
- 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
- 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
- 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
- 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
- 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
- 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
- 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
- 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
- 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
- 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
- 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
- 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
- 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
- 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
- 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
- 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
- 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
- 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
- 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
- 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
- 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
- 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
- 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
- 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
- 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
- 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
- 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
- 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
- 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
- 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
- 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
- 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
- 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
- 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
- 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
- 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
- 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
- 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
- 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
- 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
- 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
- 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
- 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
- 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
- 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
- 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
- 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
- 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
- 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
- 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
- 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
- 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
- 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
- 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
- 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
- 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
- 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
- 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
- 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
- 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
- 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
- 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
- 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
- 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
- 0x9AFCE626CE85B507ULL,
-};
-
-uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len)
-{
- const unsigned char *data = _data;
-
- while (len--) {
- int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
- crc = crc_table[i] ^ (crc << 8);
- }
-
- return crc;
-}
-
-uint64_t bch_crc64(const void *data, size_t len)
-{
- uint64_t crc = 0xffffffffffffffffULL;
-
- crc = bch_crc64_update(crc, data, len);
-
- return crc ^ 0xffffffffffffffffULL;
-}
#include <linux/ratelimit.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
+#include <linux/crc64.h>
#include "closure.h"
#define RB_PREV(ptr, member) \
container_of_or_null(rb_prev(&(ptr)->member), typeof(*ptr), member)
+static inline uint64_t bch_crc64(const void *p, size_t len)
+{
+ uint64_t crc = 0xffffffffffffffffULL;
+
+ crc = crc64_be(crc, p, len);
+ return crc ^ 0xffffffffffffffffULL;
+}
+
+static inline uint64_t bch_crc64_update(uint64_t crc,
+ const void *p,
+ size_t len)
+{
+ crc = crc64_be(crc, p, len);
+ return crc;
+}
+
/* Does linear interpolation between powers of two */
static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
{
{
return bdev->bd_inode->i_size >> 9;
}
-
-uint64_t bch_crc64_update(uint64_t, const void *, size_t);
-uint64_t bch_crc64(const void *, size_t);
-
#endif /* _BCACHE_UTIL_H */
schedule_work(&scif_info.misc_work);
}
-static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct scif_mmu_notif *mmn;
mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
scif_rma_destroy_tcw(mmn, start, end - start);
+
+ return 0;
}
static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
/*
* MMUOPS notifier callout functions
*/
-static void gru_invalidate_range_start(struct mmu_notifier *mn,
+static int gru_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ bool blockable)
{
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
ms_notifier);
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
start, end, atomic_read(&gms->ms_range_active));
gru_flush_tlb_range(gms, start, end - start);
+
+ return 0;
}
static void gru_invalidate_range_end(struct mmu_notifier *mn,
f->vendor == (u16) PCI_ANY_ID) &&
(f->device == dev->device ||
f->device == (u16) PCI_ANY_ID)) {
- calltime = fixup_debug_start(dev, f->hook);
- f->hook(dev);
- fixup_debug_report(dev, calltime, f->hook);
+ void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ hook = offset_to_ptr(&f->hook_offset);
+#else
+ hook = f->hook;
+#endif
+ calltime = fixup_debug_start(dev, hook);
+ hook(dev);
+ fixup_debug_report(dev, calltime, hook);
}
}
static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
{
struct mport_cdev_priv *priv;
- struct mport_dev *md;
struct rio_async_tx_wait w_param;
struct mport_dma_req *req;
dma_cookie_t cookie;
int ret;
priv = (struct mport_cdev_priv *)filp->private_data;
- md = priv->md;
if (unlikely(copy_from_user(&w_param, arg, sizeof(w_param))))
return -EFAULT;
/* ------------------------------------------------------------------ */
+static bool in_range(struct gntdev_grant_map *map,
+ unsigned long start, unsigned long end)
+{
+ if (!map->vma)
+ return false;
+ if (map->vma->vm_start >= end)
+ return false;
+ if (map->vma->vm_end <= start)
+ return false;
+
+ return true;
+}
+
static void unmap_if_in_range(struct gntdev_grant_map *map,
unsigned long start, unsigned long end)
{
unsigned long mstart, mend;
int err;
- if (!map->vma)
- return;
- if (map->vma->vm_start >= end)
- return;
- if (map->vma->vm_end <= start)
- return;
mstart = max(start, map->vma->vm_start);
mend = min(end, map->vma->vm_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
WARN_ON(err);
}
-static void mn_invl_range_start(struct mmu_notifier *mn,
+static int mn_invl_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ bool blockable)
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct gntdev_grant_map *map;
+ int ret = 0;
+
+ /* TODO do we really need a mutex here? */
+ if (blockable)
+ mutex_lock(&priv->lock);
+ else if (!mutex_trylock(&priv->lock))
+ return -EAGAIN;
- mutex_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
+ if (in_range(map, start, end)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
unmap_if_in_range(map, start, end);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
+ if (in_range(map, start, end)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
unmap_if_in_range(map, start, end);
}
+
+out_unlock:
mutex_unlock(&priv->lock);
+
+ return ret;
}
static void mn_release(struct mmu_notifier *mn,
* of time to convert from RISC OS epoch to Unix epoch.
*/
static void
-adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
+adfs_adfs2unix_time(struct timespec64 *tv, struct inode *inode)
{
unsigned int high, low;
/* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since
/* convert from RISC OS to Unix epoch */
nsec -= nsec_unix_epoch_diff_risc_os_epoch;
- *tv = ns_to_timespec(nsec);
+ *tv = ns_to_timespec64(nsec);
return;
cur_time:
- *tv = timespec64_to_timespec(current_time(inode));
+ *tv = current_time(inode);
return;
too_early:
struct inode *
adfs_iget(struct super_block *sb, struct object_info *obj)
{
- struct timespec ts;
struct inode *inode;
inode = new_inode(sb);
ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
inode->i_mode = adfs_atts2mode(sb, inode);
- ts = timespec64_to_timespec(inode->i_mtime);
- adfs_adfs2unix_time(&ts, inode);
- inode->i_mtime = timespec_to_timespec64(ts);
+ adfs_adfs2unix_time(&inode->i_mtime, inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
#include <linux/list.h>
#include <linux/completion.h>
#include <linux/file.h>
+#include <linux/magic.h>
/* This is the range of ioctl() numbers we claim as ours */
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
{
- return (struct autofs_sb_info *)(sb->s_fs_info);
+ return sb->s_magic != AUTOFS_SUPER_MAGIC ?
+ NULL : (struct autofs_sb_info *)(sb->s_fs_info);
}
static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
struct autofs_sb_info *,
struct autofs_packet_expire __user *);
int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int when);
+ struct autofs_sb_info *sbi, unsigned int how);
int autofs_expire_multi(struct super_block *, struct vfsmount *,
struct autofs_sb_info *, int __user *);
-struct dentry *autofs_expire_direct(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int how);
-struct dentry *autofs_expire_indirect(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int how);
/* Device node initialization */
#include "autofs_i.h"
-static unsigned long now;
-
/* Check if a dentry can be expired */
static inline int autofs_can_expire(struct dentry *dentry,
- unsigned long timeout, int do_now)
+ unsigned long timeout, unsigned int how)
{
struct autofs_info *ino = autofs_dentry_ino(dentry);
if (ino == NULL)
return 0;
- if (!do_now) {
+ if (!(how & AUTOFS_EXP_IMMEDIATE)) {
/* Too young to die */
- if (!timeout || time_after(ino->last_used + timeout, now))
+ if (!timeout || time_after(ino->last_used + timeout, jiffies))
return 0;
}
return 1;
}
/* Check a mount point for busyness */
-static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
+static int autofs_mount_busy(struct vfsmount *mnt,
+ struct dentry *dentry, unsigned int how)
{
struct dentry *top = dentry;
struct path path = {.mnt = mnt, .dentry = dentry};
goto done;
}
+ /* Not a submount, has a forced expire been requested */
+ if (how & AUTOFS_EXP_FORCED) {
+ status = 0;
+ goto done;
+ }
+
/* Update the expiry counter if fs is busy */
if (!may_umount_tree(path.mnt)) {
struct autofs_info *ino;
static int autofs_direct_busy(struct vfsmount *mnt,
struct dentry *top,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
pr_debug("top %p %pd\n", top, top);
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return 0;
+
/* If it's busy update the expiry counters */
if (!may_umount_tree(mnt)) {
struct autofs_info *ino;
}
/* Timeout of a direct mount is determined by its top dentry */
- if (!autofs_can_expire(top, timeout, do_now))
+ if (!autofs_can_expire(top, timeout, how))
return 1;
return 0;
static int autofs_tree_busy(struct vfsmount *mnt,
struct dentry *top,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
struct autofs_info *top_ino = autofs_dentry_ino(top);
struct dentry *p;
* If the fs is busy update the expiry counter.
*/
if (d_mountpoint(p)) {
- if (autofs_mount_busy(mnt, p)) {
+ if (autofs_mount_busy(mnt, p, how)) {
top_ino->last_used = jiffies;
dput(p);
return 1;
}
}
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return 0;
+
/* Timeout of a tree mount is ultimately determined by its top dentry */
- if (!autofs_can_expire(top, timeout, do_now))
+ if (!autofs_can_expire(top, timeout, how))
return 1;
return 0;
static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
struct dentry *parent,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
struct dentry *p;
if (d_mountpoint(p)) {
/* Can we umount this guy */
- if (autofs_mount_busy(mnt, p))
+ if (autofs_mount_busy(mnt, p, how))
continue;
+ /* This isn't a submount so if a forced expire
+ * has been requested, user space handles busy
+ * mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return p;
+
/* Can we expire this guy */
- if (autofs_can_expire(p, timeout, do_now))
+ if (autofs_can_expire(p, timeout, how))
return p;
}
}
}
/* Check if we can expire a direct mount (possibly a tree) */
-struct dentry *autofs_expire_direct(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+static struct dentry *autofs_expire_direct(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ unsigned int how)
{
- unsigned long timeout;
struct dentry *root = dget(sb->s_root);
- int do_now = how & AUTOFS_EXP_IMMEDIATE;
struct autofs_info *ino;
+ unsigned long timeout;
if (!root)
return NULL;
- now = jiffies;
timeout = sbi->exp_timeout;
- if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
+ if (!autofs_direct_busy(mnt, root, timeout, how)) {
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(root);
/* No point expiring a pending mount */
ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
+ if (!autofs_direct_busy(mnt, root, timeout, how)) {
spin_lock(&sbi->fs_lock);
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
static struct dentry *should_expire(struct dentry *dentry,
struct vfsmount *mnt,
unsigned long timeout,
- int how)
+ unsigned int how)
{
- int do_now = how & AUTOFS_EXP_IMMEDIATE;
- int exp_leaves = how & AUTOFS_EXP_LEAVES;
struct autofs_info *ino = autofs_dentry_ino(dentry);
unsigned int ino_count;
pr_debug("checking mountpoint %p %pd\n", dentry, dentry);
/* Can we umount this guy */
- if (autofs_mount_busy(mnt, dentry))
+ if (autofs_mount_busy(mnt, dentry, how))
return NULL;
+ /* This isn't a submount so if a forced expire
+ * has been requested, user space handles busy
+ * mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return dentry;
+
/* Can we expire this guy */
- if (autofs_can_expire(dentry, timeout, do_now))
+ if (autofs_can_expire(dentry, timeout, how))
return dentry;
return NULL;
}
if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
pr_debug("checking symlink %p %pd\n", dentry, dentry);
+
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return dentry;
+
/*
* A symlink can't be "busy" in the usual sense so
* just check last used for expire timeout.
*/
- if (autofs_can_expire(dentry, timeout, do_now))
+ if (autofs_can_expire(dentry, timeout, how))
return dentry;
return NULL;
}
return NULL;
/* Case 2: tree mount, expire iff entire tree is not busy */
- if (!exp_leaves) {
- /* Path walk currently on this dentry? */
- ino_count = atomic_read(&ino->count) + 1;
- if (d_count(dentry) > ino_count)
- return NULL;
+ if (!(how & AUTOFS_EXP_LEAVES)) {
+ /* Not a forced expire? */
+ if (!(how & AUTOFS_EXP_FORCED)) {
+ /* ref-walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (d_count(dentry) > ino_count)
+ return NULL;
+ }
- if (!autofs_tree_busy(mnt, dentry, timeout, do_now))
+ if (!autofs_tree_busy(mnt, dentry, timeout, how))
return dentry;
/*
* Case 3: pseudo direct mount, expire individual leaves
* (autofs-4.1).
*/
} else {
- /* Path walk currently on this dentry? */
struct dentry *expired;
- ino_count = atomic_read(&ino->count) + 1;
- if (d_count(dentry) > ino_count)
- return NULL;
+ /* Not a forced expire? */
+ if (!(how & AUTOFS_EXP_FORCED)) {
+ /* ref-walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (d_count(dentry) > ino_count)
+ return NULL;
+ }
- expired = autofs_check_leaves(mnt, dentry, timeout, do_now);
+ expired = autofs_check_leaves(mnt, dentry, timeout, how);
if (expired) {
if (expired == dentry)
dput(dentry);
* - it is unused by any user process
* - it has been unused for exp_timeout time
*/
-struct dentry *autofs_expire_indirect(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+static struct dentry *autofs_expire_indirect(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ unsigned int how)
{
unsigned long timeout;
struct dentry *root = sb->s_root;
if (!root)
return NULL;
- now = jiffies;
timeout = sbi->exp_timeout;
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
- int flags = how;
-
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(dentry);
if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
}
spin_unlock(&sbi->fs_lock);
- expired = should_expire(dentry, mnt, timeout, flags);
+ expired = should_expire(dentry, mnt, timeout, how);
if (!expired)
continue;
/* Make sure a reference is not taken on found if
* things have changed.
*/
- flags &= ~AUTOFS_EXP_LEAVES;
+ how &= ~AUTOFS_EXP_LEAVES;
found = should_expire(expired, mnt, timeout, how);
if (!found || found != expired)
/* Something has changed, continue */
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(dentry);
/* avoid rapid-fire expire attempts if expiry fails */
- ino->last_used = now;
+ ino->last_used = jiffies;
ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
}
int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int when)
+ struct autofs_sb_info *sbi, unsigned int how)
{
struct dentry *dentry;
int ret = -EAGAIN;
if (autofs_type_trigger(sbi->type))
- dentry = autofs_expire_direct(sb, mnt, sbi, when);
+ dentry = autofs_expire_direct(sb, mnt, sbi, how);
else
- dentry = autofs_expire_indirect(sb, mnt, sbi, when);
+ dentry = autofs_expire_indirect(sb, mnt, sbi, how);
if (dentry) {
struct autofs_info *ino = autofs_dentry_ino(dentry);
spin_lock(&sbi->fs_lock);
/* avoid rapid-fire expire attempts if expiry fails */
- ino->last_used = now;
+ ino->last_used = jiffies;
ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
int autofs_expire_multi(struct super_block *sb, struct vfsmount *mnt,
struct autofs_sb_info *sbi, int __user *arg)
{
- int do_now = 0;
+ unsigned int how = 0;
- if (arg && get_user(do_now, arg))
+ if (arg && get_user(how, arg))
return -EFAULT;
- return autofs_do_expire_multi(sb, mnt, sbi, do_now);
+ return autofs_do_expire_multi(sb, mnt, sbi, how);
}
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/parser.h>
-#include <linux/magic.h>
#include "autofs_i.h"
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
BUG_ON(!ino);
autofs_clean_ino(ino);
struct autofs_info *ino = autofs_dentry_ino(dentry);
struct autofs_info *p_ino;
- /* This allows root to remove symlinks */
- if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (!autofs_oz_mode(sbi))
+ return -EACCES;
+
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs_dentry_ino(dentry->d_parent);
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
spin_lock(&sbi->lookup_lock);
if (!simple_empty(dentry)) {
spin_unlock(&sbi->lookup_lock);
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
pr_debug("dentry %p, creating %pd\n", dentry, dentry);
BUG_ON(!ino);
*
* 1) epmutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (spinlock)
+ * 3) ep->wq.lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a spinlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->wq.lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
* of epoll file descriptors, we use the current recursion depth as
* the lockdep subkey.
* It is possible to drop the "ep->mtx" and to use the global
- * mutex "epmutex" (together with "ep->lock") to have it working,
+ * mutex "epmutex" (together with "ep->wq.lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
* Events that require holding "epmutex" are very rare, while for
* normal operations the epoll private "ep->mtx" will guarantee
* This structure is stored inside the "private_data" member of the file
* structure and represents the main data structure for the eventpoll
* interface.
+ *
+ * Access to it is protected by the lock inside wq.
*/
struct eventpoll {
- /* Protect the access to this structure */
- spinlock_t lock;
-
/*
* This mutex is used to ensure that files are not removed
* while epoll is using them. This is held during the event
/*
* This is a single linked list that chains all the "struct epitem" that
* happened while transferring ready events to userspace w/out
- * holding ->lock.
+ * holding ->wq.lock.
*/
struct epitem *ovflist;
}
/* Tells us if the item is currently linked */
-static inline int ep_is_linked(struct list_head *p)
+static inline int ep_is_linked(struct epitem *epi)
{
- return !list_empty(p);
+ return !list_empty(&epi->rdllink);
}
static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
return ep_events_available(ep) || busy_loop_timeout(start_time);
}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
/*
* Busy poll if globally on and supporting sockets found && no events,
*/
static void ep_busy_loop(struct eventpoll *ep, int nonblock)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE(ep->napi_id);
if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep);
-#endif
}
static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
if (ep->napi_id)
ep->napi_id = 0;
-#endif
}
/*
*/
static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
struct eventpoll *ep;
unsigned int napi_id;
struct socket *sock;
/* record NAPI ID for use in next busy poll */
ep->napi_id = napi_id;
-#endif
}
+#else
+
+static inline void ep_busy_loop(struct eventpoll *ep, int nonblock)
+{
+}
+
+static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
+{
+}
+
+static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
+{
+}
+
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
/**
* ep_call_nested - Perform a bound (possibly) nested call, by checking
* that the recursion limit is not exceeded, and that
{
__poll_t res;
int pwake = 0;
- unsigned long flags;
struct epitem *epi, *nepi;
LIST_HEAD(txlist);
+ lockdep_assert_irqs_enabled();
+
/*
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
* because we want the "sproc" callback to be able to do it
* in a lockless way.
*/
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
list_splice_init(&ep->rdllist, &txlist);
ep->ovflist = NULL;
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
/*
* Now call the callback function.
*/
res = (*sproc)(ep, &txlist, priv);
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
* queued into ->ovflist but the "txlist" might already
* contain them, and the list_splice() below takes care of them.
*/
- if (!ep_is_linked(&epi->rdllink)) {
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
}
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
if (!ep_locked)
mutex_unlock(&ep->mtx);
*/
static int ep_remove(struct eventpoll *ep, struct epitem *epi)
{
- unsigned long flags;
struct file *file = epi->ffd.file;
+ lockdep_assert_irqs_enabled();
+
/*
- * Removes poll wait queue hooks. We _have_ to do this without holding
- * the "ep->lock" otherwise a deadlock might occur. This because of the
- * sequence of the lock acquisition. Here we do "ep->lock" then the wait
- * queue head lock when unregistering the wait queue. The wakeup callback
- * will run by holding the wait queue head lock and will call our callback
- * that will try to get "ep->lock".
+ * Removes poll wait queue hooks.
*/
ep_unregister_pollwait(ep, epi);
rb_erase_cached(&epi->rbn, &ep->rbr);
- spin_lock_irqsave(&ep->lock, flags);
- if (ep_is_linked(&epi->rdllink))
+ spin_lock_irq(&ep->wq.lock);
+ if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
* Walks through the whole tree by freeing each "struct epitem". At this
* point we are sure no poll callbacks will be lingering around, and also by
* holding "epmutex" we can be sure that no file cleanup code will hit
- * us during this operation. So we can avoid the lock on "ep->lock".
+ * us during this operation. So we can avoid the lock on "ep->wq.lock".
* We do not need to lock ep->mtx, either, we only do it to prevent
* a lockdep warning.
*/
if (unlikely(!ep))
goto free_uid;
- spin_lock_init(&ep->lock);
mutex_init(&ep->mtx);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
__poll_t pollflags = key_to_poll(key);
int ewake = 0;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->wq.lock, flags);
ep_set_busy_poll_napi_id(epi);
}
/* If this file is already in the ready list we exit soon */
- if (!ep_is_linked(&epi->rdllink)) {
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
}
pwake++;
out_unlock:
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->wq.lock, flags);
/* We have to call this outside the lock */
if (pwake)
{
int error, pwake = 0;
__poll_t revents;
- unsigned long flags;
long user_watches;
struct epitem *epi;
struct ep_pqueue epq;
+ lockdep_assert_irqs_enabled();
+
user_watches = atomic_long_read(&ep->user->epoll_watches);
if (unlikely(user_watches >= max_user_watches))
return -ENOSPC;
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
/* If the file is already "ready" we drop it inside the ready list */
- if (revents && !ep_is_linked(&epi->rdllink)) {
+ if (revents && !ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
pwake++;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
atomic_long_inc(&ep->user->epoll_watches);
* list, since that is used/cleaned only inside a section bound by "mtx".
* And ep_insert() is called with "mtx" held.
*/
- spin_lock_irqsave(&ep->lock, flags);
- if (ep_is_linked(&epi->rdllink))
+ spin_lock_irq(&ep->wq.lock);
+ if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
wakeup_source_unregister(ep_wakeup_source(epi));
int pwake = 0;
poll_table pt;
+ lockdep_assert_irqs_enabled();
+
init_poll_funcptr(&pt, NULL);
/*
* 1) Flush epi changes above to other CPUs. This ensures
* we do not miss events from ep_poll_callback if an
* event occurs immediately after we call f_op->poll().
- * We need this because we did not take ep->lock while
+ * We need this because we did not take ep->wq.lock while
* changing epi above (but ep_poll_callback does take
- * ep->lock).
+ * ep->wq.lock).
*
* 2) We also need to ensure we do not miss _past_ events
* when calling f_op->poll(). This barrier also
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- spin_lock_irq(&ep->lock);
- if (!ep_is_linked(&epi->rdllink)) {
+ spin_lock_irq(&ep->wq.lock);
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- spin_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->wq.lock);
}
/* We have to call this outside the lock */
int maxevents, long timeout)
{
int res = 0, eavail, timed_out = 0;
- unsigned long flags;
u64 slack = 0;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
+ lockdep_assert_irqs_enabled();
+
if (timeout > 0) {
struct timespec64 end_time = ep_set_mstimeout(timeout);
* caller specified a non blocking operation.
*/
timed_out = 1;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
goto check_events;
}
if (!ep_events_available(ep))
ep_busy_loop(ep, timed_out);
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
if (!ep_events_available(ep)) {
/*
break;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
timed_out = 1;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
}
__remove_wait_queue(&ep->wq, &wait);
/* Is it worth to try to dig for events ? */
eavail = ep_events_available(ep);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
/*
* Try to transfer events to user space. In case we get 0 events and
int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
{
struct super_block *sb = inode->i_sb;
- const int limit = sb->s_maxbytes >> MSDOS_SB(sb)->cluster_bits;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ const int limit = sb->s_maxbytes >> sbi->cluster_bits;
struct fat_entry fatent;
struct fat_cache_id cid;
int nr;
*fclus = 0;
*dclus = MSDOS_I(inode)->i_start;
+ if (!fat_valid_entry(sbi, *dclus)) {
+ fat_fs_error_ratelimit(sb,
+ "%s: invalid start cluster (i_pos %lld, start %08x)",
+ __func__, MSDOS_I(inode)->i_pos, *dclus);
+ return -EIO;
+ }
if (cluster == 0)
return 0;
/* prevent the infinite loop of cluster chain */
if (*fclus > limit) {
fat_fs_error_ratelimit(sb,
- "%s: detected the cluster chain loop"
- " (i_pos %lld)", __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: detected the cluster chain loop (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
}
goto out;
else if (nr == FAT_ENT_FREE) {
fat_fs_error_ratelimit(sb,
- "%s: invalid cluster chain (i_pos %lld)",
- __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: invalid cluster chain (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
} else if (nr == FAT_ENT_EOF) {
return err;
}
-int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
+int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
{
struct super_block *sb = dir->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct fat_slot_info *sinfo);
extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
struct msdos_dir_entry **de);
-extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
+extern int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts);
extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
struct fat_slot_info *sinfo);
extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo);
fatent->fat_inode = NULL;
}
+static inline bool fat_valid_entry(struct msdos_sb_info *sbi, int entry)
+{
+ return FAT_START_ENT <= entry && entry < sbi->max_cluster;
+}
+
extern void fat_ent_access_init(struct super_block *sb);
extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent,
int entry);
int nr_cluster);
extern int fat_free_clusters(struct inode *inode, int cluster);
extern int fat_count_free_clusters(struct super_block *sb);
+extern int fat_trim_fs(struct inode *inode, struct fstrim_range *range);
/* fat/file.c */
extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
} while (0)
extern int fat_clusters_flush(struct super_block *sb);
extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
+extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 __time, __le16 __date, u8 time_cs);
-extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
+extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 *time, __le16 *date, u8 *time_cs);
extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
*/
#include <linux/blkdev.h>
+#include <linux/sched/signal.h>
#include "fat.h"
struct fatent_operations {
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = entry + (entry >> 1);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = (entry << sbi->fatent_shift);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
int err, offset;
sector_t blocknr;
- if (entry < FAT_START_ENT || sbi->max_cluster <= entry) {
+ if (!fat_valid_entry(sbi, entry)) {
fatent_brelse(fatent);
fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
return -EIO;
unlock_fat(sbi);
return err;
}
+
+static int fat_trim_clusters(struct super_block *sb, u32 clus, u32 nr_clus)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ return sb_issue_discard(sb, fat_clus_to_blknr(sbi, clus),
+ nr_clus * sbi->sec_per_clus, GFP_NOFS, 0);
+}
+
+int fat_trim_fs(struct inode *inode, struct fstrim_range *range)
+{
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ const struct fatent_operations *ops = sbi->fatent_ops;
+ struct fat_entry fatent;
+ u64 ent_start, ent_end, minlen, trimmed = 0;
+ u32 free = 0;
+ unsigned long reada_blocks, reada_mask, cur_block = 0;
+ int err = 0;
+
+ /*
+ * FAT data is organized as clusters, trim at the granulary of cluster.
+ *
+ * fstrim_range is in byte, convert vaules to cluster index.
+ * Treat sectors before data region as all used, not to trim them.
+ */
+ ent_start = max_t(u64, range->start>>sbi->cluster_bits, FAT_START_ENT);
+ ent_end = ent_start + (range->len >> sbi->cluster_bits) - 1;
+ minlen = range->minlen >> sbi->cluster_bits;
+
+ if (ent_start >= sbi->max_cluster || range->len < sbi->cluster_size)
+ return -EINVAL;
+ if (ent_end >= sbi->max_cluster)
+ ent_end = sbi->max_cluster - 1;
+
+ reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
+ reada_mask = reada_blocks - 1;
+
+ fatent_init(&fatent);
+ lock_fat(sbi);
+ fatent_set_entry(&fatent, ent_start);
+ while (fatent.entry <= ent_end) {
+ /* readahead of fat blocks */
+ if ((cur_block & reada_mask) == 0) {
+ unsigned long rest = sbi->fat_length - cur_block;
+ fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
+ }
+ cur_block++;
+
+ err = fat_ent_read_block(sb, &fatent);
+ if (err)
+ goto error;
+ do {
+ if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
+ free++;
+ } else if (free) {
+ if (free >= minlen) {
+ u32 clus = fatent.entry - free;
+
+ err = fat_trim_clusters(sb, clus, free);
+ if (err && err != -EOPNOTSUPP)
+ goto error;
+ if (!err)
+ trimmed += free;
+ err = 0;
+ }
+ free = 0;
+ }
+ } while (fat_ent_next(sbi, &fatent) && fatent.entry <= ent_end);
+
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto error;
+ }
+
+ if (need_resched()) {
+ fatent_brelse(&fatent);
+ unlock_fat(sbi);
+ cond_resched();
+ lock_fat(sbi);
+ }
+ }
+ /* handle scenario when tail entries are all free */
+ if (free && free >= minlen) {
+ u32 clus = fatent.entry - free;
+
+ err = fat_trim_clusters(sb, clus, free);
+ if (err && err != -EOPNOTSUPP)
+ goto error;
+ if (!err)
+ trimmed += free;
+ err = 0;
+ }
+
+error:
+ fatent_brelse(&fatent);
+ unlock_fat(sbi);
+
+ range->len = trimmed << sbi->cluster_bits;
+
+ return err;
+}
return put_user(sbi->vol_id, user_attr);
}
+static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
+{
+ struct super_block *sb = inode->i_sb;
+ struct fstrim_range __user *user_range;
+ struct fstrim_range range;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ user_range = (struct fstrim_range __user *)arg;
+ if (copy_from_user(&range, user_range, sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max_t(unsigned int, range.minlen,
+ q->limits.discard_granularity);
+
+ err = fat_trim_fs(inode, &range);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(user_range, &range, sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+}
+
long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
return fat_ioctl_set_attributes(filp, user_attr);
case FAT_IOCTL_GET_VOLUME_ID:
return fat_ioctl_get_volume_id(inode, user_attr);
+ case FITRIM:
+ return fat_ioctl_fitrim(inode, arg);
default:
return -ENOTTY; /* Inappropriate ioctl for device */
}
/* doesn't deal with root inode */
int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
- struct timespec ts;
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
- fat_time_fat2unix(sbi, &ts, de->time, de->date, 0);
- inode->i_mtime = timespec_to_timespec64(ts);
+ fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
if (sbi->options.isvfat) {
- fat_time_fat2unix(sbi, &ts, de->ctime,
+ fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
de->cdate, de->ctime_cs);
- inode->i_ctime = timespec_to_timespec64(ts);
- fat_time_fat2unix(sbi, &ts, 0, de->adate, 0);
- inode->i_atime = timespec_to_timespec64(ts);
+ fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
static int __fat_write_inode(struct inode *inode, int wait)
{
- struct timespec ts;
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
raw_entry->size = cpu_to_le32(inode->i_size);
raw_entry->attr = fat_make_attrs(inode);
fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart);
- ts = timespec64_to_timespec(inode->i_mtime);
- fat_time_unix2fat(sbi, &ts, &raw_entry->time,
+ fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
&raw_entry->date, NULL);
if (sbi->options.isvfat) {
__le16 atime;
- ts = timespec64_to_timespec(inode->i_ctime);
- fat_time_unix2fat(sbi, &ts, &raw_entry->ctime,
+ fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
&raw_entry->cdate, &raw_entry->ctime_cs);
- ts = timespec64_to_timespec(inode->i_atime);
- fat_time_unix2fat(sbi, &ts, &atime,
+ fat_time_unix2fat(sbi, &inode->i_atime, &atime,
&raw_entry->adate, NULL);
}
spin_unlock(&sbi->inode_hash_lock);
#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100)
/* Linear day numbers of the respective 1sts in non-leap years. */
-static time_t days_in_year[] = {
+static long days_in_year[] = {
/* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */
0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
};
/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
-void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
+void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 __time, __le16 __date, u8 time_cs)
{
u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date);
- time_t second, day, leap_day, month, year;
+ time64_t second;
+ long day, leap_day, month, year;
year = date >> 9;
month = max(1, (date >> 5) & 0xf);
second = (time & 0x1f) << 1;
second += ((time >> 5) & 0x3f) * SECS_PER_MIN;
second += (time >> 11) * SECS_PER_HOUR;
- second += (year * 365 + leap_day
+ second += (time64_t)(year * 365 + leap_day
+ days_in_year[month] + day
+ DAYS_DELTA) * SECS_PER_DAY;
}
/* Convert linear UNIX date to a FAT time/date pair. */
-void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
+void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 *time, __le16 *date, u8 *time_cs)
{
struct tm tm;
- time_to_tm(ts->tv_sec,
+ time64_to_tm(ts->tv_sec,
(sbi->options.tz_set ? sbi->options.time_offset :
-sys_tz.tz_minuteswest) * SECS_PER_MIN, &tm);
/***** Creates a directory entry (name is already formatted). */
static int msdos_add_entry(struct inode *dir, const unsigned char *name,
int is_dir, int is_hid, int cluster,
- struct timespec *ts, struct fat_slot_info *sinfo)
+ struct timespec64 *ts, struct fat_slot_info *sinfo)
{
struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
struct msdos_dir_entry de;
if (err)
return err;
- dir->i_ctime = dir->i_mtime = timespec_to_timespec64(*ts);
+ dir->i_ctime = dir->i_mtime = *ts;
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
struct inode *inode = NULL;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
unsigned char msdos_name[MSDOS_NAME];
int err, is_hid;
}
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &t, &sinfo);
+ err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &ts, &sinfo);
if (err)
goto out;
inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
struct inode *inode;
unsigned char msdos_name[MSDOS_NAME];
struct timespec64 ts;
- struct timespec t;
int err, is_hid, cluster;
mutex_lock(&MSDOS_SB(sb)->s_lock);
}
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- cluster = fat_alloc_new_dir(dir, &t);
+ cluster = fat_alloc_new_dir(dir, &ts);
if (cluster < 0) {
err = cluster;
goto out;
}
- err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &t, &sinfo);
+ err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo);
if (err)
goto out_free;
inc_nlink(dir);
new_i_pos = MSDOS_I(new_inode)->i_pos;
fat_detach(new_inode);
} else {
- struct timespec t = timespec64_to_timespec(ts);
err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0,
- &t, &sinfo);
+ &ts, &sinfo);
if (err)
goto out;
new_i_pos = sinfo.i_pos;
static int vfat_build_slots(struct inode *dir, const unsigned char *name,
int len, int is_dir, int cluster,
- struct timespec *ts,
+ struct timespec64 *ts,
struct msdos_dir_slot *slots, int *nr_slots)
{
struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
}
static int vfat_add_entry(struct inode *dir, const struct qstr *qname,
- int is_dir, int cluster, struct timespec *ts,
+ int is_dir, int cluster, struct timespec64 *ts,
struct fat_slot_info *sinfo)
{
struct msdos_dir_slot *slots;
goto cleanup;
/* update timestamp */
- dir->i_ctime = dir->i_mtime = dir->i_atime = timespec_to_timespec64(*ts);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = *ts;
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &t, &sinfo);
+ err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo);
if (err)
goto out;
inode_inc_iversion(dir);
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
int err, cluster;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- cluster = fat_alloc_new_dir(dir, &t);
+ cluster = fat_alloc_new_dir(dir, &ts);
if (cluster < 0) {
err = cluster;
goto out;
}
- err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &t, &sinfo);
+ err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo);
if (err)
goto out_free;
inode_inc_iversion(dir);
struct inode *old_inode, *new_inode;
struct fat_slot_info old_sinfo, sinfo;
struct timespec64 ts;
- struct timespec t;
loff_t new_i_pos;
int err, is_dir, update_dotdot, corrupt = 0;
struct super_block *sb = old_dir->i_sb;
new_i_pos = MSDOS_I(new_inode)->i_pos;
fat_detach(new_inode);
} else {
- t = timespec64_to_timespec(ts);
err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0,
- &t, &sinfo);
+ &ts, &sinfo);
if (err)
goto out;
new_i_pos = sinfo.i_pos;
MacOS 8. It includes all Mac specific filesystem data such as
data forks and creator codes, but it also has several UNIX
style features such as file ownership and permissions.
-
-config HFSPLUS_FS_POSIX_ACL
- bool "HFS+ POSIX Access Control Lists"
- depends on HFSPLUS_FS
- select FS_POSIX_ACL
- help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- It needs to understand that POSIX ACLs are treated only under
- Linux. POSIX ACLs doesn't mean something under Mac OS X.
- Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs,
- which are part of the NFSv4 standard.
-
- If you don't know what Access Control Lists are, say N
hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
-
-hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/fs/hfsplus/acl.h
- *
- * Vyacheslav Dubeyko <slava@dubeyko.com>
- *
- * Handler for Posix Access Control Lists (ACLs) support.
- */
-
-#include <linux/posix_acl_xattr.h>
-
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
-
-/* posix_acl.c */
-struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type);
-int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
- int type);
-extern int hfsplus_init_posix_acl(struct inode *, struct inode *);
-
-#else /* CONFIG_HFSPLUS_FS_POSIX_ACL */
-#define hfsplus_get_posix_acl NULL
-#define hfsplus_set_posix_acl NULL
-
-static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
-{
- return 0;
-}
-#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
#include "xattr.h"
-#include "acl.h"
static inline void hfsplus_instantiate(struct dentry *dentry,
struct inode *inode, u32 cnid)
if (res)
goto out_err;
- res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ res = hfsplus_init_security(inode, dir, &dentry->d_name);
if (res == -EOPNOTSUPP)
res = 0; /* Operation is not supported. */
else if (res) {
if (res)
goto failed_mknod;
- res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ res = hfsplus_init_security(inode, dir, &dentry->d_name);
if (res == -EOPNOTSUPP)
res = 0; /* Operation is not supported. */
else if (res) {
.mknod = hfsplus_mknod,
.rename = hfsplus_rename,
.listxattr = hfsplus_listxattr,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- .get_acl = hfsplus_get_posix_acl,
- .set_acl = hfsplus_set_posix_acl,
-#endif
};
const struct file_operations hfsplus_dir_operations = {
int i;
int err = 0;
+ /* Mapping the allocation file may lock the extent tree */
+ WARN_ON(mutex_is_locked(&HFSPLUS_SB(sb)->ext_tree->tree_lock));
+
hfsplus_dump_extent(extent);
for (i = 0; i < 8; extent++, i++) {
count = be32_to_cpu(extent->block_count);
if (res)
break;
start = be32_to_cpu(fd.key->ext.start_block);
- hfsplus_free_extents(sb, ext_entry,
- total_blocks - start,
- total_blocks);
hfs_brec_remove(&fd);
+
+ mutex_unlock(&fd.tree->tree_lock);
+ hfsplus_free_extents(sb, ext_entry, total_blocks - start,
+ total_blocks);
total_blocks = start;
+ mutex_lock(&fd.tree->tree_lock);
} while (total_blocks > blocks);
hfs_find_exit(&fd);
}
while (1) {
if (alloc_cnt == hip->first_blocks) {
+ mutex_unlock(&fd.tree->tree_lock);
hfsplus_free_extents(sb, hip->first_extents,
alloc_cnt, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->first_extents);
hip->first_blocks = blk_cnt;
+ mutex_lock(&fd.tree->tree_lock);
break;
}
res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
if (res)
break;
+ hfs_brec_remove(&fd);
+
+ mutex_unlock(&fd.tree->tree_lock);
start = hip->cached_start;
hfsplus_free_extents(sb, hip->cached_extents,
alloc_cnt - start, alloc_cnt - blk_cnt);
alloc_cnt = start;
hip->cached_start = hip->cached_blocks = 0;
hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
- hfs_brec_remove(&fd);
+ mutex_lock(&fd.tree->tree_lock);
}
hfs_find_exit(&fd);
#define DBG_EXTENT 0x00000020
#define DBG_BITMAP 0x00000040
#define DBG_ATTR_MOD 0x00000080
-#define DBG_ACL_MOD 0x00000100
#if 0
#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
#include "xattr.h"
-#include "acl.h"
static int hfsplus_readpage(struct file *file, struct page *page)
{
setattr_copy(inode, attr);
mark_inode_dirty(inode);
- if (attr->ia_valid & ATTR_MODE) {
- error = posix_acl_chmod(inode, inode->i_mode);
- if (unlikely(error))
- return error;
- }
-
return 0;
}
static const struct inode_operations hfsplus_file_inode_operations = {
.setattr = hfsplus_setattr,
.listxattr = hfsplus_listxattr,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- .get_acl = hfsplus_get_posix_acl,
- .set_acl = hfsplus_set_posix_acl,
-#endif
};
static const struct file_operations hfsplus_file_operations = {
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/fs/hfsplus/posix_acl.c
- *
- * Vyacheslav Dubeyko <slava@dubeyko.com>
- *
- * Handler for Posix Access Control Lists (ACLs) support.
- */
-
-#include "hfsplus_fs.h"
-#include "xattr.h"
-#include "acl.h"
-
-struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
-{
- struct posix_acl *acl;
- char *xattr_name;
- char *value = NULL;
- ssize_t size;
-
- hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
- break;
- case ACL_TYPE_DEFAULT:
- xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
- break;
- default:
- return ERR_PTR(-EINVAL);
- }
-
- size = __hfsplus_getxattr(inode, xattr_name, NULL, 0);
-
- if (size > 0) {
- value = (char *)hfsplus_alloc_attr_entry();
- if (unlikely(!value))
- return ERR_PTR(-ENOMEM);
- size = __hfsplus_getxattr(inode, xattr_name, value, size);
- }
-
- if (size > 0)
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- else if (size == -ENODATA)
- acl = NULL;
- else
- acl = ERR_PTR(size);
-
- hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
-
- return acl;
-}
-
-static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
- int type)
-{
- int err;
- char *xattr_name;
- size_t size = 0;
- char *value = NULL;
-
- hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
- break;
-
- case ACL_TYPE_DEFAULT:
- xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- break;
-
- default:
- return -EINVAL;
- }
-
- if (acl) {
- size = posix_acl_xattr_size(acl->a_count);
- if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE))
- return -ENOMEM;
- value = (char *)hfsplus_alloc_attr_entry();
- if (unlikely(!value))
- return -ENOMEM;
- err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
- if (unlikely(err < 0))
- goto end_set_acl;
- }
-
- err = __hfsplus_setxattr(inode, xattr_name, value, size, 0);
-
-end_set_acl:
- hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
-
- if (!err)
- set_cached_acl(inode, type, acl);
-
- return err;
-}
-
-int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type)
-{
- int err;
-
- if (type == ACL_TYPE_ACCESS && acl) {
- err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (err)
- return err;
- }
- return __hfsplus_set_posix_acl(inode, acl, type);
-}
-
-int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
-{
- int err = 0;
- struct posix_acl *default_acl, *acl;
-
- hfs_dbg(ACL_MOD,
- "[%s]: ino %lu, dir->ino %lu\n",
- __func__, inode->i_ino, dir->i_ino);
-
- if (S_ISLNK(inode->i_mode))
- return 0;
-
- err = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
- if (err)
- return err;
-
- if (default_acl) {
- err = __hfsplus_set_posix_acl(inode, default_acl,
- ACL_TYPE_DEFAULT);
- posix_acl_release(default_acl);
- }
-
- if (acl) {
- if (!err)
- err = __hfsplus_set_posix_acl(inode, acl,
- ACL_TYPE_ACCESS);
- posix_acl_release(acl);
- }
- return err;
-}
goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
- if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
+ if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) {
+ err = -EINVAL;
goto out_put_root;
+ }
inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_put_hidden_dir;
}
- err = hfsplus_init_inode_security(sbi->hidden_dir,
- root, &str);
+ err = hfsplus_init_security(sbi->hidden_dir,
+ root, &str);
if (err == -EOPNOTSUPP)
err = 0; /* Operation is not supported. */
else if (err) {
return size;
}
-/* Decomposes a single unicode character. */
-static inline u16 *decompose_unichar(wchar_t uc, int *size)
+/* Decomposes a non-Hangul unicode character. */
+static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
{
int off;
return hfsplus_decompose_table + (off / 4);
}
+/*
+ * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
+ * precomposed Hangul, otherwise return the length of the decomposition.
+ *
+ * This function was adapted from sample code from the Unicode Standard
+ * Annex #15: Unicode Normalization Forms, version 3.2.0.
+ *
+ * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
+ * under the Terms of Use in http://www.unicode.org/copyright.html.
+ */
+static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
+{
+ int index;
+ int l, v, t;
+
+ index = uc - Hangul_SBase;
+ if (index < 0 || index >= Hangul_SCount)
+ return 0;
+
+ l = Hangul_LBase + index / Hangul_NCount;
+ v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
+ t = Hangul_TBase + index % Hangul_TCount;
+
+ result[0] = l;
+ result[1] = v;
+ if (t != Hangul_TBase) {
+ result[2] = t;
+ return 3;
+ }
+ return 2;
+}
+
+/* Decomposes a single unicode character. */
+static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
+{
+ u16 *result;
+
+ /* Hangul is handled separately */
+ result = hangul_buffer;
+ *size = hfsplus_try_decompose_hangul(uc, result);
+ if (*size == 0)
+ result = hfsplus_decompose_nonhangul(uc, size);
+ return result;
+}
+
int hfsplus_asc2uni(struct super_block *sb,
struct hfsplus_unistr *ustr, int max_unistr_len,
const char *astr, int len)
int size, dsize, decompose;
u16 *dstr, outlen = 0;
wchar_t c;
+ u16 dhangul[3];
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
while (outlen < max_unistr_len && len > 0) {
size = asc2unichar(sb, astr, len, &c);
if (decompose)
- dstr = decompose_unichar(c, &dsize);
+ dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
unsigned long hash;
wchar_t c;
u16 c2;
+ u16 dhangul[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
len -= size;
if (decompose)
- dstr = decompose_unichar(c, &dsize);
+ dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
const char *astr1, *astr2;
u16 c1, c2;
wchar_t c;
+ u16 dhangul_1[3], dhangul_2[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
len1 -= size;
if (decompose)
- dstr1 = decompose_unichar(c, &dsize1);
+ dstr1 = decompose_unichar(c, &dsize1,
+ dhangul_1);
if (!decompose || !dstr1) {
c1 = c;
dstr1 = &c1;
len2 -= size;
if (decompose)
- dstr2 = decompose_unichar(c, &dsize2);
+ dstr2 = decompose_unichar(c, &dsize2,
+ dhangul_2);
if (!decompose || !dstr2) {
c2 = c;
dstr2 = &c2;
*/
#include "hfsplus_fs.h"
-#include <linux/posix_acl_xattr.h>
#include <linux/nls.h>
#include "xattr.h"
-#include "acl.h"
static int hfsplus_removexattr(struct inode *inode, const char *name);
&hfsplus_xattr_osx_handler,
&hfsplus_xattr_user_handler,
&hfsplus_xattr_trusted_handler,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
&hfsplus_xattr_security_handler,
NULL
};
int hfsplus_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr);
-int hfsplus_init_inode_security(struct inode *inode, struct inode *dir,
- const struct qstr *qstr);
-
#endif
#include "hfsplus_fs.h"
#include "xattr.h"
-#include "acl.h"
static int hfsplus_security_getxattr(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
&hfsplus_initxattrs, NULL);
}
-int hfsplus_init_inode_security(struct inode *inode,
- struct inode *dir,
- const struct qstr *qstr)
-{
- int err;
-
- err = hfsplus_init_posix_acl(inode, dir);
- if (!err)
- err = hfsplus_init_security(inode, dir, qstr);
- return err;
-}
-
const struct xattr_handler hfsplus_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.get = hfsplus_security_getxattr,
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
- memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
vma_init(&pseudo_vma, current->mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pagevec_init(&pvec);
* allocation routines. If NUMA is configured, use page index
* as input to create an allocation policy.
*/
- memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
vma_init(&pseudo_vma, mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
return err;
}
-static int nilfs_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
- sbp[0]->s_mtime = cpu_to_le64(get_seconds());
+ sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds());
skip_mount_setup:
sbp[0]->s_state =
config PROC_KCORE
bool "/proc/kcore support" if !ARM
depends on PROC_FS && MMU
+ select CRASH_CORE
help
Provides a virtual ELF core file of the live kernel. This can
be read with gdb and other ELF tools. No modifications can be
if (!task)
return -ESRCH;
seq_puts(m, "Latency Top version : v0.1\n");
- for (i = 0; i < 32; i++) {
+ for (i = 0; i < LT_SAVECOUNT; i++) {
struct latency_record *lr = &task->latency_record[i];
if (lr->backtrace[0]) {
int q;
if (!task)
return -ESRCH;
len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
- len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
put_task_struct(task);
-
- return len;
+ return simple_read_from_buffer(buf, count, ppos, numbuf, len);
}
static const struct file_operations proc_fail_nth_operations = {
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
+ struct task_struct *task;
void *page;
- ssize_t length;
- struct task_struct *task = get_proc_task(inode);
-
- length = -ESRCH;
- if (!task)
- goto out_no_task;
+ int rv;
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
+ if (!task) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
/* A task may only write its own attributes. */
- length = -EACCES;
- if (current != task)
- goto out;
+ if (current != task) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
+ rcu_read_unlock();
if (count > PAGE_SIZE)
count = PAGE_SIZE;
/* No partial writes. */
- length = -EINVAL;
if (*ppos != 0)
- goto out;
+ return -EINVAL;
page = memdup_user(buf, count);
if (IS_ERR(page)) {
- length = PTR_ERR(page);
+ rv = PTR_ERR(page);
goto out;
}
/* Guard against adverse ptrace interaction */
- length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex);
- if (length < 0)
+ rv = mutex_lock_interruptible(¤t->signal->cred_guard_mutex);
+ if (rv < 0)
goto out_free;
- length = security_setprocattr(file->f_path.dentry->d_name.name,
- page, count);
+ rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
mutex_unlock(¤t->signal->cred_guard_mutex);
out_free:
kfree(page);
out:
- put_task_struct(task);
-out_no_task:
- return length;
+ return rv;
}
static const struct file_operations proc_pid_attr_operations = {
REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
- REG("maps", S_IRUGO, proc_tid_maps_operations),
+ REG("maps", S_IRUGO, proc_pid_maps_operations),
#ifdef CONFIG_PROC_CHILDREN
REG("children", S_IRUGO, proc_tid_children_operations),
#endif
#ifdef CONFIG_NUMA
- REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
+ REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
#endif
REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
LNK("cwd", proc_cwd_link),
REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
- REG("smaps", S_IRUGO, proc_tid_smaps_operations),
+ REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
if (!dir_emit_dots(file, ctx))
return 0;
+ i = ctx->pos - 2;
read_lock(&proc_subdir_lock);
de = pde_subdir_first(de);
- i = ctx->pos - 2;
for (;;) {
if (!de) {
read_unlock(&proc_subdir_lock);
pde_put(de);
return 0;
}
- read_lock(&proc_subdir_lock);
ctx->pos++;
+ read_lock(&proc_subdir_lock);
next = pde_subdir_next(de);
pde_put(de);
de = next;
kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
SLAB_ACCOUNT|SLAB_PANIC, NULL);
proc_dir_entry_cache = kmem_cache_create_usercopy(
- "proc_dir_entry", SIZEOF_PDE_SLOT, 0, SLAB_PANIC,
- OFFSETOF_PDE_NAME, SIZEOF_PDE_INLINE_NAME, NULL);
+ "proc_dir_entry", SIZEOF_PDE, 0, SLAB_PANIC,
+ offsetof(struct proc_dir_entry, inline_name),
+ SIZEOF_PDE_INLINE_NAME, NULL);
+ BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE);
}
static int proc_show_options(struct seq_file *seq, struct dentry *root)
char inline_name[];
} __randomize_layout;
-#define OFFSETOF_PDE_NAME offsetof(struct proc_dir_entry, inline_name)
-#define SIZEOF_PDE_SLOT \
- (OFFSETOF_PDE_NAME + 34 <= 64 ? 64 : \
- OFFSETOF_PDE_NAME + 34 <= 128 ? 128 : \
- OFFSETOF_PDE_NAME + 34 <= 192 ? 192 : \
- OFFSETOF_PDE_NAME + 34 <= 256 ? 256 : \
- OFFSETOF_PDE_NAME + 34 <= 512 ? 512 : \
- 0)
-
-#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE_SLOT - OFFSETOF_PDE_NAME)
+#define SIZEOF_PDE ( \
+ sizeof(struct proc_dir_entry) < 128 ? 128 : \
+ sizeof(struct proc_dir_entry) < 192 ? 192 : \
+ sizeof(struct proc_dir_entry) < 256 ? 256 : \
+ sizeof(struct proc_dir_entry) < 512 ? 512 : \
+ 0)
+#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
return PDE(inode)->data;
}
-static inline struct pid *proc_pid(struct inode *inode)
+static inline struct pid *proc_pid(const struct inode *inode)
{
return PROC_I(inode)->pid;
}
-static inline struct task_struct *get_proc_task(struct inode *inode)
+static inline struct task_struct *get_proc_task(const struct inode *inode)
{
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
struct inode *inode;
struct task_struct *task;
struct mm_struct *mm;
- struct mem_size_stats *rollup;
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);
extern const struct file_operations proc_pid_maps_operations;
-extern const struct file_operations proc_tid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
-extern const struct file_operations proc_tid_numa_maps_operations;
extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
-extern const struct file_operations proc_tid_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
* Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
*/
+#include <linux/crash_core.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/kcore.h>
#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
#endif
-/* An ELF note in memory */
-struct memelfnote
-{
- const char *name;
- int type;
- unsigned int datasz;
- void *data;
-};
-
static LIST_HEAD(kclist_head);
-static DEFINE_RWLOCK(kclist_lock);
+static DECLARE_RWSEM(kclist_lock);
static int kcore_need_update = 1;
-void
-kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
+/* This doesn't grab kclist_lock, so it should only be used at init time. */
+void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
+ int type)
{
new->addr = (unsigned long)addr;
new->size = size;
new->type = type;
- write_lock(&kclist_lock);
list_add_tail(&new->list, &kclist_head);
- write_unlock(&kclist_lock);
}
-static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
+static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
+ size_t *data_offset)
{
size_t try, size;
struct kcore_list *m;
size = try;
*nphdr = *nphdr + 1;
}
- *elf_buflen = sizeof(struct elfhdr) +
- (*nphdr + 2)*sizeof(struct elf_phdr) +
- 3 * ((sizeof(struct elf_note)) +
- roundup(sizeof(CORE_STR), 4)) +
- roundup(sizeof(struct elf_prstatus), 4) +
- roundup(sizeof(struct elf_prpsinfo), 4) +
- roundup(arch_task_struct_size, 4);
- *elf_buflen = PAGE_ALIGN(*elf_buflen);
- return size + *elf_buflen;
-}
-
-static void free_kclist_ents(struct list_head *head)
-{
- struct kcore_list *tmp, *pos;
- list_for_each_entry_safe(pos, tmp, head, list) {
- list_del(&pos->list);
- kfree(pos);
- }
+ *phdrs_len = *nphdr * sizeof(struct elf_phdr);
+ *notes_len = (4 * sizeof(struct elf_note) +
+ 3 * ALIGN(sizeof(CORE_STR), 4) +
+ VMCOREINFO_NOTE_NAME_BYTES +
+ ALIGN(sizeof(struct elf_prstatus), 4) +
+ ALIGN(sizeof(struct elf_prpsinfo), 4) +
+ ALIGN(arch_task_struct_size, 4) +
+ ALIGN(vmcoreinfo_size, 4));
+ *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
+ *notes_len);
+ return *data_offset + size;
}
-/*
- * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
- */
-static void __kcore_update_ram(struct list_head *list)
-{
- int nphdr;
- size_t size;
- struct kcore_list *tmp, *pos;
- LIST_HEAD(garbage);
-
- write_lock(&kclist_lock);
- if (kcore_need_update) {
- list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
- if (pos->type == KCORE_RAM
- || pos->type == KCORE_VMEMMAP)
- list_move(&pos->list, &garbage);
- }
- list_splice_tail(list, &kclist_head);
- } else
- list_splice(list, &garbage);
- kcore_need_update = 0;
- proc_root_kcore->size = get_kcore_size(&nphdr, &size);
- write_unlock(&kclist_lock);
-
- free_kclist_ents(&garbage);
-}
-
#ifdef CONFIG_HIGHMEM
/*
* because memory hole is not as big as !HIGHMEM case.
* (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
*/
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *head)
{
- LIST_HEAD(head);
struct kcore_list *ent;
- int ret = 0;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
ent->addr = (unsigned long)__va(0);
ent->size = max_low_pfn << PAGE_SHIFT;
ent->type = KCORE_RAM;
- list_add(&ent->list, &head);
- __kcore_update_ram(&head);
- return ret;
+ list_add(&ent->list, head);
+ return 0;
}
#else /* !CONFIG_HIGHMEM */
return 1;
}
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *list)
{
int nid, ret;
unsigned long end_pfn;
- LIST_HEAD(head);
/* Not inialized....update now */
/* find out "max pfn" */
end_pfn = node_end;
}
/* scan 0 to max_pfn */
- ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
- if (ret) {
- free_kclist_ents(&head);
+ ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
+ if (ret)
return -ENOMEM;
- }
- __kcore_update_ram(&head);
- return ret;
+ return 0;
}
#endif /* CONFIG_HIGHMEM */
-/*****************************************************************************/
-/*
- * determine size of ELF note
- */
-static int notesize(struct memelfnote *en)
-{
- int sz;
-
- sz = sizeof(struct elf_note);
- sz += roundup((strlen(en->name) + 1), 4);
- sz += roundup(en->datasz, 4);
-
- return sz;
-} /* end notesize() */
-
-/*****************************************************************************/
-/*
- * store a note in the header buffer
- */
-static char *storenote(struct memelfnote *men, char *bufp)
+static int kcore_update_ram(void)
{
- struct elf_note en;
-
-#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
-
- en.n_namesz = strlen(men->name) + 1;
- en.n_descsz = men->datasz;
- en.n_type = men->type;
-
- DUMP_WRITE(&en, sizeof(en));
- DUMP_WRITE(men->name, en.n_namesz);
-
- /* XXX - cast from long long to long to avoid need for libgcc.a */
- bufp = (char*) roundup((unsigned long)bufp,4);
- DUMP_WRITE(men->data, men->datasz);
- bufp = (char*) roundup((unsigned long)bufp,4);
-
-#undef DUMP_WRITE
-
- return bufp;
-} /* end storenote() */
+ LIST_HEAD(list);
+ LIST_HEAD(garbage);
+ int nphdr;
+ size_t phdrs_len, notes_len, data_offset;
+ struct kcore_list *tmp, *pos;
+ int ret = 0;
-/*
- * store an ELF coredump header in the supplied buffer
- * nphdr is the number of elf_phdr to insert
- */
-static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
-{
- struct elf_prstatus prstatus; /* NT_PRSTATUS */
- struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
- struct elf_phdr *nhdr, *phdr;
- struct elfhdr *elf;
- struct memelfnote notes[3];
- off_t offset = 0;
- struct kcore_list *m;
+ down_write(&kclist_lock);
+ if (!xchg(&kcore_need_update, 0))
+ goto out;
- /* setup ELF header */
- elf = (struct elfhdr *) bufp;
- bufp += sizeof(struct elfhdr);
- offset += sizeof(struct elfhdr);
- memcpy(elf->e_ident, ELFMAG, SELFMAG);
- elf->e_ident[EI_CLASS] = ELF_CLASS;
- elf->e_ident[EI_DATA] = ELF_DATA;
- elf->e_ident[EI_VERSION]= EV_CURRENT;
- elf->e_ident[EI_OSABI] = ELF_OSABI;
- memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
- elf->e_type = ET_CORE;
- elf->e_machine = ELF_ARCH;
- elf->e_version = EV_CURRENT;
- elf->e_entry = 0;
- elf->e_phoff = sizeof(struct elfhdr);
- elf->e_shoff = 0;
- elf->e_flags = ELF_CORE_EFLAGS;
- elf->e_ehsize = sizeof(struct elfhdr);
- elf->e_phentsize= sizeof(struct elf_phdr);
- elf->e_phnum = nphdr;
- elf->e_shentsize= 0;
- elf->e_shnum = 0;
- elf->e_shstrndx = 0;
-
- /* setup ELF PT_NOTE program header */
- nhdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
- nhdr->p_type = PT_NOTE;
- nhdr->p_offset = 0;
- nhdr->p_vaddr = 0;
- nhdr->p_paddr = 0;
- nhdr->p_filesz = 0;
- nhdr->p_memsz = 0;
- nhdr->p_flags = 0;
- nhdr->p_align = 0;
-
- /* setup ELF PT_LOAD program header for every area */
- list_for_each_entry(m, &kclist_head, list) {
- phdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
-
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
- phdr->p_vaddr = (size_t)m->addr;
- if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
- phdr->p_paddr = __pa(m->addr);
- else
- phdr->p_paddr = (elf_addr_t)-1;
- phdr->p_filesz = phdr->p_memsz = m->size;
- phdr->p_align = PAGE_SIZE;
+ ret = kcore_ram_list(&list);
+ if (ret) {
+ /* Couldn't get the RAM list, try again next time. */
+ WRITE_ONCE(kcore_need_update, 1);
+ list_splice_tail(&list, &garbage);
+ goto out;
}
- /*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
- */
- nhdr->p_offset = offset;
-
- /* set up the process status */
- notes[0].name = CORE_STR;
- notes[0].type = NT_PRSTATUS;
- notes[0].datasz = sizeof(struct elf_prstatus);
- notes[0].data = &prstatus;
-
- memset(&prstatus, 0, sizeof(struct elf_prstatus));
-
- nhdr->p_filesz = notesize(¬es[0]);
- bufp = storenote(¬es[0], bufp);
-
- /* set up the process info */
- notes[1].name = CORE_STR;
- notes[1].type = NT_PRPSINFO;
- notes[1].datasz = sizeof(struct elf_prpsinfo);
- notes[1].data = &prpsinfo;
-
- memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
- prpsinfo.pr_state = 0;
- prpsinfo.pr_sname = 'R';
- prpsinfo.pr_zomb = 0;
-
- strcpy(prpsinfo.pr_fname, "vmlinux");
- strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
-
- nhdr->p_filesz += notesize(¬es[1]);
- bufp = storenote(¬es[1], bufp);
+ list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
+ if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
+ list_move(&pos->list, &garbage);
+ }
+ list_splice_tail(&list, &kclist_head);
- /* set up the task structure */
- notes[2].name = CORE_STR;
- notes[2].type = NT_TASKSTRUCT;
- notes[2].datasz = arch_task_struct_size;
- notes[2].data = current;
+ proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len,
+ &data_offset);
- nhdr->p_filesz += notesize(¬es[2]);
- bufp = storenote(¬es[2], bufp);
+out:
+ up_write(&kclist_lock);
+ list_for_each_entry_safe(pos, tmp, &garbage, list) {
+ list_del(&pos->list);
+ kfree(pos);
+ }
+ return ret;
+}
-} /* end elf_kcore_store_hdr() */
+static void append_kcore_note(char *notes, size_t *i, const char *name,
+ unsigned int type, const void *desc,
+ size_t descsz)
+{
+ struct elf_note *note = (struct elf_note *)¬es[*i];
+
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = descsz;
+ note->n_type = type;
+ *i += sizeof(*note);
+ memcpy(¬es[*i], name, note->n_namesz);
+ *i = ALIGN(*i + note->n_namesz, 4);
+ memcpy(¬es[*i], desc, descsz);
+ *i = ALIGN(*i + descsz, 4);
+}
-/*****************************************************************************/
-/*
- * read from the ELF header and then kernel memory
- */
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
char *buf = file->private_data;
- ssize_t acc = 0;
- size_t size, tsz;
- size_t elf_buflen;
+ size_t phdrs_offset, notes_offset, data_offset;
+ size_t phdrs_len, notes_len;
+ struct kcore_list *m;
+ size_t tsz;
int nphdr;
unsigned long start;
+ size_t orig_buflen = buflen;
+ int ret = 0;
- read_lock(&kclist_lock);
- size = get_kcore_size(&nphdr, &elf_buflen);
+ down_read(&kclist_lock);
+
+ get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset);
+ phdrs_offset = sizeof(struct elfhdr);
+ notes_offset = phdrs_offset + phdrs_len;
+
+ /* ELF file header. */
+ if (buflen && *fpos < sizeof(struct elfhdr)) {
+ struct elfhdr ehdr = {
+ .e_ident = {
+ [EI_MAG0] = ELFMAG0,
+ [EI_MAG1] = ELFMAG1,
+ [EI_MAG2] = ELFMAG2,
+ [EI_MAG3] = ELFMAG3,
+ [EI_CLASS] = ELF_CLASS,
+ [EI_DATA] = ELF_DATA,
+ [EI_VERSION] = EV_CURRENT,
+ [EI_OSABI] = ELF_OSABI,
+ },
+ .e_type = ET_CORE,
+ .e_machine = ELF_ARCH,
+ .e_version = EV_CURRENT,
+ .e_phoff = sizeof(struct elfhdr),
+ .e_flags = ELF_CORE_EFLAGS,
+ .e_ehsize = sizeof(struct elfhdr),
+ .e_phentsize = sizeof(struct elf_phdr),
+ .e_phnum = nphdr,
+ };
+
+ tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
+ if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
- if (buflen == 0 || *fpos >= size) {
- read_unlock(&kclist_lock);
- return 0;
+ buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
}
- /* trim buflen to not go beyond EOF */
- if (buflen > size - *fpos)
- buflen = size - *fpos;
-
- /* construct an ELF core header if we'll need some of it */
- if (*fpos < elf_buflen) {
- char * elf_buf;
-
- tsz = elf_buflen - *fpos;
- if (buflen < tsz)
- tsz = buflen;
- elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
- if (!elf_buf) {
- read_unlock(&kclist_lock);
- return -ENOMEM;
+ /* ELF program headers. */
+ if (buflen && *fpos < phdrs_offset + phdrs_len) {
+ struct elf_phdr *phdrs, *phdr;
+
+ phdrs = kzalloc(phdrs_len, GFP_KERNEL);
+ if (!phdrs) {
+ ret = -ENOMEM;
+ goto out;
}
- elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
- read_unlock(&kclist_lock);
- if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
- kfree(elf_buf);
- return -EFAULT;
+
+ phdrs[0].p_type = PT_NOTE;
+ phdrs[0].p_offset = notes_offset;
+ phdrs[0].p_filesz = notes_len;
+
+ phdr = &phdrs[1];
+ list_for_each_entry(m, &kclist_head, list) {
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
+ phdr->p_vaddr = (size_t)m->addr;
+ if (m->type == KCORE_RAM)
+ phdr->p_paddr = __pa(m->addr);
+ else if (m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa_symbol(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
+ phdr->p_filesz = phdr->p_memsz = m->size;
+ phdr->p_align = PAGE_SIZE;
+ phdr++;
}
- kfree(elf_buf);
+
+ tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
+ if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
+ tsz)) {
+ kfree(phdrs);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(phdrs);
+
+ buffer += tsz;
buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
- acc += tsz;
+ }
+
+ /* ELF note segment. */
+ if (buflen && *fpos < notes_offset + notes_len) {
+ struct elf_prstatus prstatus = {};
+ struct elf_prpsinfo prpsinfo = {
+ .pr_sname = 'R',
+ .pr_fname = "vmlinux",
+ };
+ char *notes;
+ size_t i = 0;
+
+ strlcpy(prpsinfo.pr_psargs, saved_command_line,
+ sizeof(prpsinfo.pr_psargs));
+
+ notes = kzalloc(notes_len, GFP_KERNEL);
+ if (!notes) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo,
+ sizeof(prpsinfo));
+ append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
+ arch_task_struct_size);
+ /*
+ * vmcoreinfo_size is mostly constant after init time, but it
+ * can be changed by crash_save_vmcoreinfo(). Racing here with a
+ * panic on another CPU before the machine goes down is insanely
+ * unlikely, but it's better to not leave potential buffer
+ * overflows lying around, regardless.
+ */
+ append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
+ vmcoreinfo_data,
+ min(vmcoreinfo_size, notes_len - i));
+
+ tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
+ if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
+ kfree(notes);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(notes);
- /* leave now if filled buffer already */
- if (buflen == 0)
- return acc;
- } else
- read_unlock(&kclist_lock);
+ buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
+ }
/*
* Check to see if our file offset matches with any of
* the addresses in the elf_phdr on our list.
*/
- start = kc_offset_to_vaddr(*fpos - elf_buflen);
+ start = kc_offset_to_vaddr(*fpos - data_offset);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
-
- while (buflen) {
- struct kcore_list *m;
- read_lock(&kclist_lock);
- list_for_each_entry(m, &kclist_head, list) {
- if (start >= m->addr && start < (m->addr+m->size))
- break;
+ m = NULL;
+ while (buflen) {
+ /*
+ * If this is the first iteration or the address is not within
+ * the previous entry, search for a matching entry.
+ */
+ if (!m || start < m->addr || start >= m->addr + m->size) {
+ list_for_each_entry(m, &kclist_head, list) {
+ if (start >= m->addr &&
+ start < m->addr + m->size)
+ break;
+ }
}
- read_unlock(&kclist_lock);
if (&m->list == &kclist_head) {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, buf, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, buf, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_USER) {
/* User page is handled prior to normal kernel page: */
- if (copy_to_user(buffer, (char *)start, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, (char *)start, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else {
if (kern_addr_valid(start)) {
/*
* hardened user copy kernel text checks.
*/
if (probe_kernel_read(buf, (void *) start, tsz)) {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else {
- if (copy_to_user(buffer, buf, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, buf, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
} else {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
}
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
- acc += tsz;
start += tsz;
tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
}
- return acc;
+out:
+ up_read(&kclist_lock);
+ if (ret)
+ return ret;
+ return orig_buflen - buflen;
}
-
static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
switch (action) {
case MEM_ONLINE:
case MEM_OFFLINE:
- write_lock(&kclist_lock);
kcore_need_update = 1;
- write_unlock(&kclist_lock);
+ break;
}
return NOTIFY_OK;
}
#include <linux/mman.h>
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
+#include <linux/percpu.h>
#include <linux/quicklist.h>
#include <linux/seq_file.h>
#include <linux/swap.h>
(unsigned long)VMALLOC_TOTAL >> 10);
show_val_kb(m, "VmallocUsed: ", 0ul);
show_val_kb(m, "VmallocChunk: ", 0ul);
+ show_val_kb(m, "Percpu: ", pcpu_nr_pages());
#ifdef CONFIG_MEMORY_FAILURE
seq_printf(m, "HardwareCorrupted: %5lu kB\n",
static int stat_open(struct inode *inode, struct file *file)
{
- size_t size = 1024 + 128 * num_online_cpus();
+ unsigned int size = 1024 + 128 * num_online_cpus();
/* minimum size to display an interrupt count : 2 bytes */
size += 2 * nr_irqs;
if (priv->mm)
mmdrop(priv->mm);
- kfree(priv->rollup);
return seq_release_private(inode, file);
}
}
static void
-show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
+show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
seq_putc(m, '\n');
}
-static int show_map(struct seq_file *m, void *v, int is_pid)
+static int show_map(struct seq_file *m, void *v)
{
- show_map_vma(m, v, is_pid);
+ show_map_vma(m, v);
m_cache_vma(m, v);
return 0;
}
-static int show_pid_map(struct seq_file *m, void *v)
-{
- return show_map(m, v, 1);
-}
-
-static int show_tid_map(struct seq_file *m, void *v)
-{
- return show_map(m, v, 0);
-}
-
static const struct seq_operations proc_pid_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_map
-};
-
-static const struct seq_operations proc_tid_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_map
+ .show = show_map
};
static int pid_maps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_maps_op);
}
-static int tid_maps_open(struct inode *inode, struct file *file)
-{
- return do_maps_open(inode, file, &proc_tid_maps_op);
-}
-
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
.release = proc_map_release,
};
-const struct file_operations proc_tid_maps_operations = {
- .open = tid_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
-};
-
/*
* Proportional Set Size(PSS): my share of RSS.
*
#ifdef CONFIG_PROC_PAGE_MONITOR
struct mem_size_stats {
- bool first;
unsigned long resident;
unsigned long shared_clean;
unsigned long shared_dirty;
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
- unsigned long first_vma_start;
u64 pss;
u64 pss_locked;
u64 swap_pss;
}
#endif /* HUGETLB_PAGE */
-#define SEQ_PUT_DEC(str, val) \
- seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
-static int show_smap(struct seq_file *m, void *v, int is_pid)
+static void smap_gather_stats(struct vm_area_struct *vma,
+ struct mem_size_stats *mss)
{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
- struct mem_size_stats mss_stack;
- struct mem_size_stats *mss;
struct mm_walk smaps_walk = {
.pmd_entry = smaps_pte_range,
#ifdef CONFIG_HUGETLB_PAGE
#endif
.mm = vma->vm_mm,
};
- int ret = 0;
- bool rollup_mode;
- bool last_vma;
-
- if (priv->rollup) {
- rollup_mode = true;
- mss = priv->rollup;
- if (mss->first) {
- mss->first_vma_start = vma->vm_start;
- mss->first = false;
- }
- last_vma = !m_next_vma(priv, vma);
- } else {
- rollup_mode = false;
- memset(&mss_stack, 0, sizeof(mss_stack));
- mss = &mss_stack;
- }
smaps_walk.private = mss;
walk_page_vma(vma, &smaps_walk);
if (vma->vm_flags & VM_LOCKED)
mss->pss_locked += mss->pss;
+}
- if (!rollup_mode) {
- show_map_vma(m, vma, is_pid);
- } else if (last_vma) {
- show_vma_header_prefix(
- m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
- seq_pad(m, ' ');
- seq_puts(m, "[rollup]\n");
- } else {
- ret = SEQ_SKIP;
- }
-
- if (!rollup_mode) {
- SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
- SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
- SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
- seq_puts(m, " kB\n");
- }
+#define SEQ_PUT_DEC(str, val) \
+ seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
- if (!rollup_mode || last_vma) {
- SEQ_PUT_DEC("Rss: ", mss->resident);
- SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
- SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
- SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
- SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
- SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
- SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
- SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
- SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
- SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
- SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
- seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
- mss->private_hugetlb >> 10, 7);
- SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
- SEQ_PUT_DEC(" kB\nSwapPss: ",
- mss->swap_pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nLocked: ",
- mss->pss_locked >> PSS_SHIFT);
- seq_puts(m, " kB\n");
- }
- if (!rollup_mode) {
- if (arch_pkeys_enabled())
- seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
- show_smap_vma_flags(m, vma);
- }
- m_cache_vma(m, vma);
- return ret;
+/* Show the contents common for smaps and smaps_rollup */
+static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss)
+{
+ SEQ_PUT_DEC("Rss: ", mss->resident);
+ SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
+ SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
+ SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
+ SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
+ SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
+ SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
+ SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
+ SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
+ SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+ seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+ mss->private_hugetlb >> 10, 7);
+ SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
+ SEQ_PUT_DEC(" kB\nSwapPss: ",
+ mss->swap_pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nLocked: ",
+ mss->pss_locked >> PSS_SHIFT);
+ seq_puts(m, " kB\n");
}
-#undef SEQ_PUT_DEC
-static int show_pid_smap(struct seq_file *m, void *v)
+static int show_smap(struct seq_file *m, void *v)
{
- return show_smap(m, v, 1);
+ struct vm_area_struct *vma = v;
+ struct mem_size_stats mss;
+
+ memset(&mss, 0, sizeof(mss));
+
+ smap_gather_stats(vma, &mss);
+
+ show_map_vma(m, vma);
+
+ SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
+ SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+ SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
+ seq_puts(m, " kB\n");
+
+ __show_smap(m, &mss);
+
+ if (arch_pkeys_enabled())
+ seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
+ show_smap_vma_flags(m, vma);
+
+ m_cache_vma(m, vma);
+
+ return 0;
}
-static int show_tid_smap(struct seq_file *m, void *v)
+static int show_smaps_rollup(struct seq_file *m, void *v)
{
- return show_smap(m, v, 0);
+ struct proc_maps_private *priv = m->private;
+ struct mem_size_stats mss;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long last_vma_end = 0;
+ int ret = 0;
+
+ priv->task = get_proc_task(priv->inode);
+ if (!priv->task)
+ return -ESRCH;
+
+ mm = priv->mm;
+ if (!mm || !mmget_not_zero(mm)) {
+ ret = -ESRCH;
+ goto out_put_task;
+ }
+
+ memset(&mss, 0, sizeof(mss));
+
+ down_read(&mm->mmap_sem);
+ hold_task_mempolicy(priv);
+
+ for (vma = priv->mm->mmap; vma; vma = vma->vm_next) {
+ smap_gather_stats(vma, &mss);
+ last_vma_end = vma->vm_end;
+ }
+
+ show_vma_header_prefix(m, priv->mm->mmap->vm_start,
+ last_vma_end, 0, 0, 0, 0);
+ seq_pad(m, ' ');
+ seq_puts(m, "[rollup]\n");
+
+ __show_smap(m, &mss);
+
+ release_task_mempolicy(priv);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+
+out_put_task:
+ put_task_struct(priv->task);
+ priv->task = NULL;
+
+ return ret;
}
+#undef SEQ_PUT_DEC
static const struct seq_operations proc_pid_smaps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_smap
-};
-
-static const struct seq_operations proc_tid_smaps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_smap
+ .show = show_smap
};
static int pid_smaps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_smaps_op);
}
-static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
+static int smaps_rollup_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
+ int ret;
struct proc_maps_private *priv;
- int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
-
- if (ret < 0)
- return ret;
- seq = file->private_data;
- priv = seq->private;
- priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
- if (!priv->rollup) {
- proc_map_release(inode, file);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT);
+ if (!priv)
return -ENOMEM;
+
+ ret = single_open(file, show_smaps_rollup, priv);
+ if (ret)
+ goto out_free;
+
+ priv->inode = inode;
+ priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
+ if (IS_ERR(priv->mm)) {
+ ret = PTR_ERR(priv->mm);
+
+ single_release(inode, file);
+ goto out_free;
}
- priv->rollup->first = true;
+
return 0;
+
+out_free:
+ kfree(priv);
+ return ret;
}
-static int tid_smaps_open(struct inode *inode, struct file *file)
+static int smaps_rollup_release(struct inode *inode, struct file *file)
{
- return do_maps_open(inode, file, &proc_tid_smaps_op);
+ struct seq_file *seq = file->private_data;
+ struct proc_maps_private *priv = seq->private;
+
+ if (priv->mm)
+ mmdrop(priv->mm);
+
+ kfree(priv);
+ return single_release(inode, file);
}
const struct file_operations proc_pid_smaps_operations = {
};
const struct file_operations proc_pid_smaps_rollup_operations = {
- .open = pid_smaps_rollup_open,
+ .open = smaps_rollup_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = proc_map_release,
-};
-
-const struct file_operations proc_tid_smaps_operations = {
- .open = tid_smaps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
+ .release = smaps_rollup_release,
};
enum clear_refs_types {
/*
* Display pages allocated per node and memory policy via /proc.
*/
-static int show_numa_map(struct seq_file *m, void *v, int is_pid)
+static int show_numa_map(struct seq_file *m, void *v)
{
struct numa_maps_private *numa_priv = m->private;
struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
return 0;
}
-static int show_pid_numa_map(struct seq_file *m, void *v)
-{
- return show_numa_map(m, v, 1);
-}
-
-static int show_tid_numa_map(struct seq_file *m, void *v)
-{
- return show_numa_map(m, v, 0);
-}
-
static const struct seq_operations proc_pid_numa_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_numa_map,
+ .show = show_numa_map,
};
-static const struct seq_operations proc_tid_numa_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_numa_map,
-};
-
-static int numa_maps_open(struct inode *inode, struct file *file,
- const struct seq_operations *ops)
-{
- return proc_maps_open(inode, file, ops,
- sizeof(struct numa_maps_private));
-}
-
static int pid_numa_maps_open(struct inode *inode, struct file *file)
{
- return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
-}
-
-static int tid_numa_maps_open(struct inode *inode, struct file *file)
-{
- return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
+ return proc_maps_open(inode, file, &proc_pid_numa_maps_op,
+ sizeof(struct numa_maps_private));
}
const struct file_operations proc_pid_numa_maps_operations = {
.release = proc_map_release,
};
-const struct file_operations proc_tid_numa_maps_operations = {
- .open = tid_numa_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
-};
#endif /* CONFIG_NUMA */
/*
* display a single VMA to a sequenced file
*/
-static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
- int is_pid)
+static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long ino = 0;
/*
* display mapping lines for a particular process's /proc/pid/maps
*/
-static int show_map(struct seq_file *m, void *_p, int is_pid)
+static int show_map(struct seq_file *m, void *_p)
{
struct rb_node *p = _p;
- return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
- is_pid);
-}
-
-static int show_pid_map(struct seq_file *m, void *_p)
-{
- return show_map(m, _p, 1);
-}
-
-static int show_tid_map(struct seq_file *m, void *_p)
-{
- return show_map(m, _p, 0);
+ return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
}
static void *m_start(struct seq_file *m, loff_t *pos)
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_map
-};
-
-static const struct seq_operations proc_tid_maps_ops = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_map
+ .show = show_map
};
static int maps_open(struct inode *inode, struct file *file,
return maps_open(inode, file, &proc_pid_maps_ops);
}
-static int tid_maps_open(struct inode *inode, struct file *file)
-{
- return maps_open(inode, file, &proc_tid_maps_ops);
-}
-
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
.release = map_release,
};
-const struct file_operations proc_tid_maps_operations = {
- .open = tid_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = map_release,
-};
-
static int uptime_proc_show(struct seq_file *m, void *v)
{
- struct timespec uptime;
+ struct timespec64 uptime;
struct timespec64 idle;
u64 nsec;
u32 rem;
for_each_possible_cpu(i)
nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
- get_monotonic_boottime(&uptime);
+ ktime_get_boottime_ts64(&uptime);
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
*/
-static int mmap_vmcore_fault(struct vm_fault *vmf)
+static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
{
#ifdef CONFIG_S390
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
return 0;
}
-static char *print_time(time_t t)
-{
- static char timebuf[256];
-
- sprintf(timebuf, "%ld", t);
- return timebuf;
-}
-
static void sd_print_item(struct item_head *ih, char *item)
{
printk("\tmode | size | nlinks | first direct | mtime\n");
if (stat_data_v1(ih)) {
struct stat_data_v1 *sd = (struct stat_data_v1 *)item;
- printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd),
+ printk("\t0%-6o | %6u | %2u | %d | %u\n", sd_v1_mode(sd),
sd_v1_size(sd), sd_v1_nlink(sd),
sd_v1_first_direct_byte(sd),
- print_time(sd_v1_mtime(sd)));
+ sd_v1_mtime(sd));
} else {
struct stat_data *sd = (struct stat_data *)item;
- printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
+ printk("\t0%-6o | %6llu | %2u | %d | %u\n", sd_v2_mode(sd),
(unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
- sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
+ sd_v2_rdev(sd), sd_v2_mtime(sd));
}
}
struct reiserfs_journal_desc *desc;
unsigned int oldest_trans_id = 0;
unsigned int oldest_invalid_trans_id = 0;
- time_t start;
+ time64_t start;
unsigned long oldest_start = 0;
unsigned long cur_dblock = 0;
unsigned long newest_mount_id = 9;
cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
reiserfs_info(sb, "checking transaction log (%pg)\n",
journal->j_dev_bd);
- start = get_seconds();
+ start = ktime_get_seconds();
/*
* step 1, read in the journal header block. Check the transaction
if (replay_count > 0) {
reiserfs_info(sb,
"replayed %d transactions in %lu seconds\n",
- replay_count, get_seconds() - start);
+ replay_count, ktime_get_seconds() - start);
}
/* needed to satisfy the locking in _update_journal_header_block */
reiserfs_write_lock(sb);
int new_alloc)
{
struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
- time_t now = get_seconds();
+ time64_t now = ktime_get_seconds();
/* cannot restart while nested */
BUG_ON(!th->t_trans_id);
if (th->t_refcount > 1)
struct super_block *sb, unsigned long nblocks,
int join)
{
- time_t now = get_seconds();
+ time64_t now = ktime_get_seconds();
unsigned int old_trans_id;
struct reiserfs_journal *journal = SB_JOURNAL(sb);
struct reiserfs_transaction_handle myth;
PROC_INFO_INC(sb, journal.journal_relock_writers);
goto relock;
}
- now = get_seconds();
+ now = ktime_get_seconds();
/*
* if there is no room in the journal OR
}
/* we are the first writer, set trans_id */
if (journal->j_trans_start_time == 0) {
- journal->j_trans_start_time = get_seconds();
+ journal->j_trans_start_time = ktime_get_seconds();
}
atomic_inc(&journal->j_wcount);
journal->j_len_alloc += nblocks;
*/
void reiserfs_flush_old_commits(struct super_block *sb)
{
- time_t now;
+ time64_t now;
struct reiserfs_transaction_handle th;
struct reiserfs_journal *journal = SB_JOURNAL(sb);
- now = get_seconds();
+ now = ktime_get_seconds();
/*
* safety check so we don't flush while we are replaying the log during
* mount
static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
{
- time_t now;
+ time64_t now;
int flush = flags & FLUSH_ALL;
int commit_now = flags & COMMIT_NOW;
int wait_on_commit = flags & WAIT;
}
/* deal with old transactions where we are the last writers */
- now = get_seconds();
+ now = ktime_get_seconds();
if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
commit_now = 1;
journal->j_next_async_flush = 1;
return 0;
}
+static time64_t ktime_mono_to_real_seconds(time64_t mono)
+{
+ ktime_t kt = ktime_set(mono, NSEC_PER_SEC/2);
+
+ return ktime_divns(ktime_mono_to_real(kt), NSEC_PER_SEC);
+}
+
static int show_journal(struct seq_file *m, void *unused)
{
struct super_block *sb = m->private;
"j_bcount: \t%lu\n"
"j_first_unflushed_offset: \t%lu\n"
"j_last_flush_trans_id: \t%u\n"
- "j_trans_start_time: \t%li\n"
+ "j_trans_start_time: \t%lli\n"
"j_list_bitmap_index: \t%i\n"
"j_must_wait: \t%i\n"
"j_next_full_flush: \t%i\n"
JF(j_bcount),
JF(j_first_unflushed_offset),
JF(j_last_flush_trans_id),
- JF(j_trans_start_time),
+ ktime_mono_to_real_seconds(JF(j_trans_start_time)),
JF(j_list_bitmap_index),
JF(j_must_wait),
JF(j_next_full_flush),
struct mutex j_commit_mutex;
unsigned int j_trans_id;
- time_t j_timestamp;
+ time64_t j_timestamp; /* write-only but useful for crash dump analysis */
struct reiserfs_list_bitmap *j_list_bitmap;
struct buffer_head *j_commit_bh; /* commit buffer head */
struct reiserfs_journal_cnode *j_realblock;
struct buffer_head *j_header_bh;
- time_t j_trans_start_time; /* time this transaction started */
+ time64_t j_trans_start_time; /* time this transaction started */
struct mutex j_mutex;
struct mutex j_flush_mutex;
return 0;
size = namelen + 1;
if (b->buf) {
- if (size > b->size)
+ if (b->pos + size > b->size) {
+ b->pos = -ERANGE;
return -ERANGE;
+ }
memcpy(b->buf + b->pos, name, namelen);
b->buf[b->pos + namelen] = 0;
}
static int sysv_sync_fs(struct super_block *sb, int wait)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
- unsigned long time = get_seconds(), old_time;
+ u32 time = (u32)ktime_get_real_seconds(), old_time;
mutex_lock(&sbi->s_lock);
*/
old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
if (sbi->s_type == FSTYPE_SYSV4) {
- if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
- *sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38 - time);
+ if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38u - old_time))
+ *sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38u - time);
*sbi->s_sb_time = cpu_to_fs32(sbi, time);
mark_buffer_dirty(sbi->s_bh2);
}
*/
spin_lock(&ctx->fault_pending_wqh.lock);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
- __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range);
+ __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
spin_unlock(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
* anyway.
*/
list_del(&uwq->wq.entry);
- __add_wait_queue(&ctx->fault_wqh, &uwq->wq);
+ add_wait_queue(&ctx->fault_wqh, &uwq->wq);
write_seqcount_end(&ctx->refile_seq);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
range);
if (waitqueue_active(&ctx->fault_wqh))
- __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range);
+ __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
spin_unlock(&ctx->fault_pending_wqh.lock);
}
/*
* WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
- * significant issues that need prompt attention if they should ever
- * appear at runtime. Use the versions with printk format strings
- * to provide better diagnostics.
+ * significant kernel issues that need prompt attention if they should ever
+ * appear at runtime.
+ *
+ * Do not use these macros when checking for invalid external inputs
+ * (e.g. invalid system call arguments, or invalid data coming from
+ * network/devices), and on transient conditions like ENOMEM or EAGAIN.
+ * These macros should be used for recoverable kernel issues only.
+ * For invalid external inputs, transient conditions, etc use
+ * pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary.
+ * Do not include "BUG"/"WARNING" in format strings manually to make these
+ * conditions distinguishable from kernel issues.
+ *
+ * Use the versions with printk format strings to provide better diagnostics.
*/
#ifndef __WARN_TAINT
extern __printf(3, 4)
#define KSYM_FUNC(x) x
#endif
#ifdef CONFIG_64BIT
-#define __put .quad
#ifndef KSYM_ALIGN
#define KSYM_ALIGN 8
#endif
#else
-#define __put .long
#ifndef KSYM_ALIGN
#define KSYM_ALIGN 4
#endif
#define KCRC_ALIGN 4
#endif
+.macro __put, val, name
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ .long \val - ., \name - .
+#elif defined(CONFIG_64BIT)
+ .quad \val, \name
+#else
+ .long \val, \name
+#endif
+.endm
+
/*
* note on .section use: @progbits vs %progbits nastiness doesn't matter,
* since we immediately emit into those sections anyway.
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <linux/refcount.h>
struct page;
struct device;
*/
struct bdi_writeback_congested {
unsigned long state; /* WB_[a]sync_congested flags */
- atomic_t refcnt; /* nr of attached wb's and blkg */
+ refcount_t refcnt; /* nr of attached wb's and blkg */
#ifdef CONFIG_CGROUP_WRITEBACK
struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
static inline struct bdi_writeback_congested *
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
{
- atomic_inc(&bdi->wb_congested->refcnt);
+ refcount_inc(&bdi->wb_congested->refcnt);
return bdi->wb_congested;
}
static inline void wb_congested_put(struct bdi_writeback_congested *congested)
{
- if (atomic_dec_and_test(&congested->refcnt))
+ if (refcount_dec_and_test(&congested->refcnt))
kfree(congested);
}
#include <asm/types.h>
#include <linux/bits.h>
-#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
extern unsigned int __sw_hweight8(unsigned int w);
extern unsigned int __sw_hweight16(unsigned int w);
#endif /* __KERNEL__ */
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+ static void * __attribute__((section(".discard.addressable"), used)) \
+ __PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
+
+/**
+ * offset_to_ptr - convert a relative memory offset to an absolute pointer
+ * @off: the address of the 32-bit offset value
+ */
+static inline void *offset_to_ptr(const int *off)
+{
+ return (void *)((unsigned long)off + *off);
+}
+
#endif /* __ASSEMBLY__ */
#ifndef __optimize
#ifdef __OPTIMIZE__
# define __compiletime_assert(condition, msg, prefix, suffix) \
do { \
- bool __cond = !(condition); \
+ int __cond = !(condition); \
extern void prefix ## suffix(void) __compiletime_error(msg); \
if (__cond) \
prefix ## suffix(); \
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+extern unsigned char *vmcoreinfo_data;
+extern size_t vmcoreinfo_size;
extern u32 *vmcoreinfo_note;
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
#define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
#ifndef __ASSEMBLY__
-struct kernel_symbol
-{
- unsigned long value;
- const char *name;
-};
-
#ifdef MODULE
extern struct module __this_module;
#define THIS_MODULE (&__this_module)
#define __CRC_SYMBOL(sym, sec)
#endif
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#include <linux/compiler.h>
+/*
+ * Emit the ksymtab entry as a pair of relative references: this reduces
+ * the size by half on 64-bit architectures, and eliminates the need for
+ * absolute relocations that require runtime processing on relocatable
+ * kernels.
+ */
+#define __KSYMTAB_ENTRY(sym, sec) \
+ __ADDRESSABLE(sym) \
+ asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \
+ " .balign 8 \n" \
+ "__ksymtab_" #sym ": \n" \
+ " .long " #sym "- . \n" \
+ " .long __kstrtab_" #sym "- . \n" \
+ " .previous \n")
+
+struct kernel_symbol {
+ int value_offset;
+ int name_offset;
+};
+#else
+#define __KSYMTAB_ENTRY(sym, sec) \
+ static const struct kernel_symbol __ksymtab_##sym \
+ __attribute__((section("___ksymtab" sec "+" #sym), used)) \
+ = { (unsigned long)&sym, __kstrtab_##sym }
+
+struct kernel_symbol {
+ unsigned long value;
+ const char *name;
+};
+#endif
+
/* For every exported symbol, place a struct in the __ksymtab section */
#define ___EXPORT_SYMBOL(sym, sec) \
extern typeof(sym) sym; \
__CRC_SYMBOL(sym, sec) \
static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), aligned(1))) \
+ __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
= #sym; \
- static const struct kernel_symbol __ksymtab_##sym \
- __used \
- __attribute__((section("___ksymtab" sec "+" #sym), used)) \
- = { (unsigned long)&sym, __kstrtab_##sym }
+ __KSYMTAB_ENTRY(sym, sec)
+
+#if defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
-#if defined(__KSYM_DEPS__)
+#elif defined(__KSYM_DEPS__)
/*
* For fine grained build dependencies, we want to tell the build system
typedef int (*initcall_t)(void);
typedef void (*exitcall_t)(void);
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+ return offset_to_ptr(entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+ return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
/* Used for contructor calls. */
typedef void (*ctor_fn_t)(void);
* as KEEP() in the linker script.
*/
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec) \
+ __ADDRESSABLE(fn) \
+ asm(".section \"" #__sec ".init\", \"a\" \n" \
+ "__initcall_" #fn #id ": \n" \
+ ".long " #fn " - . \n" \
+ ".previous \n");
+#else
+#define ___define_initcall(fn, id, __sec) \
static initcall_t __initcall_##fn##id __used \
- __attribute__((__section__(".initcall" #id ".init"))) = fn;
+ __attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
/*
* Early initcalls run before initializing SMP.
#define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
-#define console_initcall(fn) \
- static initcall_t __initcall_##fn \
- __used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn) \
- static initcall_t __initcall_##fn \
- __used __section(.security_initcall.init) = fn
+#define console_initcall(fn) ___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn) ___define_initcall(fn,, .security_initcall)
struct obs_kernel_param {
const char *str;
struct ipc_ids {
int in_use;
unsigned short seq;
- bool tables_initialized;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
- int max_id;
+ int max_idx;
#ifdef CONFIG_CHECKPOINT_RESTORE
int next_id;
#endif
};
#ifdef CONFIG_PROC_KCORE
-extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+void __init kclist_add(struct kcore_list *, void *, size_t, int type);
#else
static inline
void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
* arguments just once each.
*/
#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+/**
+ * round_up - round up to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round up to (must be a power of 2)
+ *
+ * Rounds @x up to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding up, use roundup() below.
+ */
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+/**
+ * round_down - round down to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round down to (must be a power of 2)
+ *
+ * Rounds @x down to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding down, use rounddown() below.
+ */
#define round_down(x, y) ((x) & ~__round_mask(x, y))
/**
# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
#endif
-/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
+/**
+ * roundup - round up to the next specified multiple
+ * @x: the value to up
+ * @y: multiple to round up to
+ *
+ * Rounds @x up to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_up().
+ *
+ * The `const' here prevents gcc-3.3 from calling __divdi3
+ */
#define roundup(x, y) ( \
{ \
const typeof(y) __y = y; \
(((x) + (__y - 1)) / __y) * __y; \
} \
)
+/**
+ * rounddown - round down to next specified multiple
+ * @x: the value to round
+ * @y: multiple to round down to
+ *
+ * Rounds @x down to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_down().
+ */
#define rounddown(x, y) ( \
{ \
typeof(x) __x = (x); \
}
#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end, bool blockable);
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
*/
bool use_hierarchy;
+ /*
+ * Should the OOM killer kill all belonging tasks, had it kill one?
+ */
+ bool oom_group;
+
/* protected by memcg_oom_lock */
bool oom_lock;
int under_oom;
}
bool mem_cgroup_oom_synchronize(bool wait);
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+ struct mem_cgroup *oom_domain);
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
#ifdef CONFIG_MEMCG_SWAP
extern int do_swap_account;
return false;
}
+static inline struct mem_cgroup *mem_cgroup_get_oom_group(
+ struct task_struct *victim, struct mem_cgroup *oom_domain)
+{
+ return NULL;
+}
+
+static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+}
+
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
int idx)
{
static inline void remove_memory(int nid, u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
+extern void __ref free_area_init_core_hotplug(int nid);
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
{
static const struct vm_operations_struct dummy_vm_ops = {};
+ memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
vma->vm_ops = &dummy_vm_ops;
INIT_LIST_HEAD(&vma->anon_vma_chain);
return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}
-static inline int zone_to_nid(struct zone *zone)
-{
-#ifdef CONFIG_NUMA
- return zone->node;
-#else
- return 0;
-#endif
-}
-
#ifdef NODE_NOT_IN_PAGE_FLAGS
extern int page_to_nid(const struct page *page);
#else
extern void __init pagecache_init(void);
extern void free_area_init(unsigned long * zones_size);
-extern void free_area_init_node(int nid, unsigned long * zones_size,
+extern void __init free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
extern void free_initmem(void);
* address space but may still be referenced by sptes until
* the last refcount is dropped.
*
- * If both of these callbacks cannot block, and invalidate_range
- * cannot block, mmu_notifier_ops.flags should have
- * MMU_INVALIDATE_DOES_NOT_BLOCK set.
+ * If blockable argument is set to false then the callback cannot
+ * sleep and has to return with -EAGAIN. 0 should be returned
+ * otherwise.
+ *
*/
- void (*invalidate_range_start)(struct mmu_notifier *mn,
+ int (*invalidate_range_start)(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end,
+ bool blockable);
void (*invalidate_range_end)(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end);
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
-extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
- unsigned long start, unsigned long end);
+extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ bool blockable);
extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end,
bool only_end);
unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
- __mmu_notifier_invalidate_range_start(mm, start, end);
+ __mmu_notifier_invalidate_range_start(mm, start, end, true);
+}
+
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ if (mm_has_notifiers(mm))
+ return __mmu_notifier_invalidate_range_start(mm, start, end, false);
+ return 0;
}
static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
{
}
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ return 0;
+}
+
static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
}
-static inline int zone_id(const struct zone *zone)
-{
- struct pglist_data *pgdat = zone->zone_pgdat;
-
- return zone - pgdat->node_zones;
-}
-
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_dev_zone(const struct zone *zone)
-{
- return zone_id(zone) == ZONE_DEVICE;
-}
-#else
-static inline bool is_dev_zone(const struct zone *zone)
-{
- return false;
-}
-#endif
-
#include <linux/memory_hotplug.h>
void build_all_zonelists(pg_data_t *pgdat);
*/
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+ return zone_idx(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+ return false;
+}
+#endif
+
/*
* Returns true if a zone has pages managed by the buddy allocator.
* All the reclaim decisions have to use this function rather than
return zone->present_pages;
}
+#ifdef CONFIG_NUMA
+static inline int zone_to_nid(struct zone *zone)
+{
+ return zone->node;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid)
+{
+ zone->node = nid;
+}
+#else
+static inline int zone_to_nid(struct zone *zone)
+{
+ return 0;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid) {}
+#endif
+
extern int movable_zone;
#ifdef CONFIG_HIGHMEM
static inline int zonelist_node_idx(struct zoneref *zoneref)
{
-#ifdef CONFIG_NUMA
- /* zone_to_nid not available in this context */
- return zoneref->zone->node;
-#else
- return 0;
-#endif /* CONFIG_NUMA */
+ return zone_to_nid(zoneref->zone);
}
struct zoneref *__next_zones_zonelist(struct zoneref *z,
}
#define NET_DIM_NEVENTS 64
-#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
static inline void net_dim_calc_stats(struct net_dim_sample *start,
* NODEMASK_ALLOC(type, name) allocates an object with a specified type and
* name.
*/
-#if NODES_SHIFT > 8 /* nodemask_t > 256 bytes */
+#if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */
#define NODEMASK_ALLOC(type, name, gfp_flags) \
type *name = kmalloc(sizeof(*name), gfp_flags)
#define NODEMASK_FREE(m) kfree(m)
return 0;
}
-void __oom_reap_task_mm(struct mm_struct *mm);
+bool __oom_reap_task_mm(struct mm_struct *mm);
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
u16 device; /* Or PCI_ANY_ID */
u32 class; /* Or PCI_ANY_ID */
unsigned int class_shift; /* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ int hook_offset;
+#else
void (*hook)(struct pci_dev *dev);
+#endif
};
enum pci_fixup_pass {
pci_fixup_suspend_late, /* pci_device_suspend_late() */
};
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
+ class_shift, hook) \
+ __ADDRESSABLE(hook) \
+ asm(".section " #sec ", \"a\" \n" \
+ ".balign 16 \n" \
+ ".short " #vendor ", " #device " \n" \
+ ".long " #class ", " #class_shift " \n" \
+ ".long " #hook " - . \n" \
+ ".previous \n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
+ class_shift, hook) \
+ __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
+ class_shift, hook)
+#else
/* Anonymous variables would be nice... */
#define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class, \
class_shift, hook) \
static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used \
__attribute__((__section__(#section), aligned((sizeof(void *))))) \
= { vendor, device, class, class_shift, hook };
+#endif
#define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class, \
class_shift, hook) \
(typeof(type) __percpu *)__alloc_percpu(sizeof(type), \
__alignof__(type))
+extern unsigned long pcpu_nr_pages(void);
+
#endif /* __LINUX_PERCPU_H */
struct ns_common *(*get_ns)(struct ns_common *ns));
/* get the associated pid namespace for a file in procfs */
-static inline struct pid_namespace *proc_pid_ns(struct inode *inode)
+static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
{
return inode->i_sb->s_fs_info;
}
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
unsigned long last_switch_count;
+ unsigned long last_switch_time;
#endif
/* Filesystem information: */
struct fs_struct *fs;
int force_sig_ptrace_errno_trap(int errno, void __user *addr);
extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
+extern void force_sigsegv(int sig, struct task_struct *p);
extern int force_sig_info(int, struct siginfo *, struct task_struct *);
extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
extern int sysctl_hung_task_check_count;
extern unsigned int sysctl_hung_task_panic;
extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_check_interval_secs;
extern int sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer,
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/ratelimit.h>
struct key;
* Some day this will be a full-fledged user tracking system..
*/
struct user_struct {
- atomic_t __count; /* reference count */
+ refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_FANOTIFY
extern struct user_struct * alloc_uid(kuid_t);
static inline struct user_struct *get_uid(struct user_struct *u)
{
- atomic_inc(&u->__count);
+ refcount_inc(&u->__count);
return u;
}
extern void free_uid(struct user_struct *);
struct shrink_control {
gfp_t gfp_mask;
+ /* current node being shrunk (for NUMA aware shrinkers) */
+ int nid;
+
/*
* How many objects scan_objects should scan and try to reclaim.
* This is reset before every call, so it is safe for callees
*/
unsigned long nr_scanned;
- /* current node being shrunk (for NUMA aware shrinkers) */
- int nid;
-
/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
};
unsigned long (*scan_objects)(struct shrinker *,
struct shrink_control *sc);
- int seeks; /* seeks to recreate an obj */
long batch; /* reclaim batch size, 0 = default */
- unsigned long flags;
+ int seeks; /* seeks to recreate an obj */
+ unsigned flags;
/* These are for internal use */
struct list_head list;
extern void __set_current_blocked(const sigset_t *);
extern int show_unhandled_signals;
-extern int get_signal(struct ksignal *ksig);
+extern bool get_signal(struct ksignal *ksig);
extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
extern void exit_signals(struct task_struct *tsk);
extern void kernel_sigaction(int, __sighandler_t);
extern struct kmem_cache *sighand_cachep;
-int unhandled_signal(struct task_struct *tsk, int sig);
+extern bool unhandled_signal(struct task_struct *tsk, int sig);
/*
* In POSIX a signal is sent either to a specific thread (Linux task)
extern swp_entry_t get_swap_page(struct page *page);
extern void put_swap_page(struct page *page, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
return static_key_false(&__tracepoint_##name.key); \
}
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name) \
+ asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \
+ " .balign 4 \n" \
+ " .long __tracepoint_" #name " - . \n" \
+ " .previous \n")
+#else
+#define __TRACEPOINT_ENTRY(name) \
+ static struct tracepoint * const __tracepoint_ptr_##name __used \
+ __attribute__((section("__tracepoints_ptrs"))) = \
+ &__tracepoint_##name
+#endif
+
/*
* We have no guarantee that gcc and the linker won't up-align the tracepoint
* structures, so we create an array of pointers that will be used for iteration
static const char __tpstrtab_##name[] \
__attribute__((section("__tracepoints_strings"))) = #name; \
struct tracepoint __tracepoint_##name \
- __attribute__((section("__tracepoints"))) = \
+ __attribute__((section("__tracepoints"), used)) = \
{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
- static struct tracepoint * const __tracepoint_ptr_##name __used \
- __attribute__((section("__tracepoints_ptrs"))) = \
- &__tracepoint_##name;
+ __TRACEPOINT_ENTRY(name);
#define DEFINE_TRACE(name) \
DEFINE_TRACE_FN(name, NULL, NULL);
*/
int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
u64 start, u64 end,
- umem_call_back cb, void *cookie);
+ umem_call_back cb,
+ bool blockable, void *cookie);
/*
* Find first region intersecting with address range.
#define AUTOFS_MIN_PROTO_VERSION 3
#define AUTOFS_MAX_PROTO_VERSION 5
-#define AUTOFS_PROTO_SUBVERSION 2
+#define AUTOFS_PROTO_SUBVERSION 3
/*
* The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
/* autofs version 4 and later definitions */
/* Mask for expire behaviour */
-#define AUTOFS_EXP_IMMEDIATE 1
-#define AUTOFS_EXP_LEAVES 2
+#define AUTOFS_EXP_NORMAL 0x00
+#define AUTOFS_EXP_IMMEDIATE 0x01
+#define AUTOFS_EXP_LEAVES 0x02
+#define AUTOFS_EXP_FORCED 0x04
#define AUTOFS_TYPE_ANY 0U
#define AUTOFS_TYPE_INDIRECT 1U
help
If you say Y here, the process accounting information is written
in a new file format that also logs the process IDs of each
- process and it's parent. Note that this file format is incompatible
+ process and its parent. Note that this file format is incompatible
with previous v0/v1/v2 file formats, so you will need updated tools
for processing it. A preliminary version of these tools is available
at <http://www.gnu.org/software/acct/>.
endif # NAMESPACES
+config CHECKPOINT_RESTORE
+ bool "Checkpoint/restore support"
+ select PROC_CHILDREN
+ default n
+ help
+ Enables additional kernel features in a sake of checkpoint/restore.
+ In particular it adds auxiliary prctl codes to setup process text,
+ data and heap segment sizes, and a few additional /proc filesystem
+ entries.
+
+ If unsure, say N here.
+
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select CGROUPS
If unsure, say Y.
-config CHECKPOINT_RESTORE
- bool "Checkpoint/restore support" if EXPERT
- select PROC_CHILDREN
- default n
- help
- Enables additional kernel features in a sake of checkpoint/restore.
- In particular it adds auxiliary prctl codes to setup process text,
- data and heap segment sizes, and a few additional /proc filesystem
- entries.
-
- If unsure, say N here.
-
config KALLSYMS
bool "Load all symbols for debugging/ksymoops" if EXPERT
default y
default n
help
Normally, and according to the Linux spec, anonymous memory obtained
- from mmap() has it's contents cleared before it is passed to
+ from mmap() has its contents cleared before it is passed to
userspace. Enabling this config option allows you to request that
mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
providing a huge performance boost. If this option is not enabled,
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/ctype.h>
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/unistd.h>
#include <linux/kernel.h>
#include <linux/fs.h>
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/delay.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_p.h>
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/minix_fs.h>
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
}
-extern initcall_t __initcall_start[];
-extern initcall_t __initcall0_start[];
-extern initcall_t __initcall1_start[];
-extern initcall_t __initcall2_start[];
-extern initcall_t __initcall3_start[];
-extern initcall_t __initcall4_start[];
-extern initcall_t __initcall5_start[];
-extern initcall_t __initcall6_start[];
-extern initcall_t __initcall7_start[];
-extern initcall_t __initcall_end[];
-
-static initcall_t *initcall_levels[] __initdata = {
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
+static initcall_entry_t *initcall_levels[] __initdata = {
__initcall0_start,
__initcall1_start,
__initcall2_start,
static void __init do_initcall_level(int level)
{
- initcall_t *fn;
+ initcall_entry_t *fn;
strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
trace_initcall_level(initcall_level_names[level]);
for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
- do_one_initcall(*fn);
+ do_one_initcall(initcall_from_entry(fn));
}
static void __init do_initcalls(void)
static void __init do_pre_smp_initcalls(void)
{
- initcall_t *fn;
+ initcall_entry_t *fn;
trace_initcall_level("early");
for (fn = __initcall_start; fn < __initcall0_start; fn++)
- do_one_initcall(*fn);
+ do_one_initcall(initcall_from_entry(fn));
}
/*
static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
+ pr_info("Run %s as init process\n", init_filename);
return do_execve(getname_kernel(init_filename),
(const char __user *const __user *)argv_init,
(const char __user *const __user *)envp_init);
/* ipc_addid() locks msq upon success. */
retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
if (retval < 0) {
- call_rcu(&msq->q_perm.rcu, msg_rcu_free);
+ ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
return retval;
}
down_write(&msg_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd,
&msqid64->msg_perm, msqid64->msg_qbytes);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
int cmd, struct msginfo *msginfo)
{
int err;
- int max_id;
+ int max_idx;
/*
* We must not return kernel stack data.
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
- max_id = ipc_get_maxid(&msg_ids(ns));
+ max_idx = ipc_get_maxidx(&msg_ids(ns));
up_read(&msg_ids(ns).rwsem);
- return (max_id < 0) ? 0 : max_id;
+ return (max_idx < 0) ? 0 : max_idx;
}
static int msgctl_stat(struct ipc_namespace *ns, int msqid,
int cmd, struct msqid64_ds *p)
{
struct msg_queue *msq;
- int id = 0;
int err;
memset(p, 0, sizeof(*p));
err = PTR_ERR(msq);
goto out_unlock;
}
- id = msq->q_perm.id;
} else { /* IPC_STAT */
msq = msq_obtain_object_check(ns, msqid);
if (IS_ERR(msq)) {
p->msg_lspid = pid_vnr(msq->q_lspid);
p->msg_lrpid = pid_vnr(msq->q_lrpid);
- ipc_unlock_object(&msq->q_perm);
- rcu_read_unlock();
- return id;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * MSG_STAT and MSG_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = msq->q_perm.id;
+ }
+ ipc_unlock_object(&msq->q_perm);
out_unlock:
rcu_read_unlock();
return err;
}
#endif
-int msg_init_ns(struct ipc_namespace *ns)
+void msg_init_ns(struct ipc_namespace *ns)
{
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
atomic_set(&ns->msg_bytes, 0);
atomic_set(&ns->msg_hdrs, 0);
- return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+ ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
}
#ifdef CONFIG_IPC_NS
}
#endif
-int __init msg_init(void)
+void __init msg_init(void)
{
- const int err = msg_init_ns(&init_ipc_ns);
+ msg_init_ns(&init_ipc_ns);
ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
IPC_MSG_IDS, sysvipc_msg_proc_show);
- return err;
}
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
- err = sem_init_ns(ns);
+ err = mq_init_ns(ns);
if (err)
goto fail_put;
- err = msg_init_ns(ns);
- if (err)
- goto fail_destroy_sem;
- err = shm_init_ns(ns);
- if (err)
- goto fail_destroy_msg;
- err = mq_init_ns(ns);
- if (err)
- goto fail_destroy_shm;
+ sem_init_ns(ns);
+ msg_init_ns(ns);
+ shm_init_ns(ns);
return ns;
-fail_destroy_shm:
- shm_exit_ns(ns);
-fail_destroy_msg:
- msg_exit_ns(ns);
-fail_destroy_sem:
- sem_exit_ns(ns);
fail_put:
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
#define sc_semopm sem_ctls[2]
#define sc_semmni sem_ctls[3]
-int sem_init_ns(struct ipc_namespace *ns)
+void sem_init_ns(struct ipc_namespace *ns)
{
ns->sc_semmsl = SEMMSL;
ns->sc_semmns = SEMMNS;
ns->sc_semopm = SEMOPM;
ns->sc_semmni = SEMMNI;
ns->used_sems = 0;
- return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
+ ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
}
#ifdef CONFIG_IPC_NS
}
#endif
-int __init sem_init(void)
+void __init sem_init(void)
{
- const int err = sem_init_ns(&init_ipc_ns);
-
+ sem_init_ns(&init_ipc_ns);
ipc_init_proc_interface("sysvipc/sem",
" key semid perms nsems uid gid cuid cgid otime ctime\n",
IPC_SEM_IDS, sysvipc_sem_proc_show);
- return err;
}
/**
/* ipc_addid() locks sma upon success. */
retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
if (retval < 0) {
- call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
return retval;
}
ns->used_sems += nsems;
{
struct sem_array *sma;
time64_t semotime;
- int id = 0;
int err;
memset(semid64, 0, sizeof(*semid64));
err = PTR_ERR(sma);
goto out_unlock;
}
- id = sma->sem_perm.id;
} else { /* IPC_STAT */
sma = sem_obtain_object_check(ns, semid);
if (IS_ERR(sma)) {
#endif
semid64->sem_nsems = sma->sem_nsems;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * SEM_STAT and SEM_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = sma->sem_perm.id;
+ }
ipc_unlock_object(&sma->sem_perm);
- rcu_read_unlock();
- return id;
-
out_unlock:
rcu_read_unlock();
return err;
int cmd, void __user *p)
{
struct seminfo seminfo;
- int max_id;
+ int max_idx;
int err;
err = security_sem_semctl(NULL, cmd);
seminfo.semusz = SEMUSZ;
seminfo.semaem = SEMAEM;
}
- max_id = ipc_get_maxid(&sem_ids(ns));
+ max_idx = ipc_get_maxidx(&sem_ids(ns));
up_read(&sem_ids(ns).rwsem);
if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
return -EFAULT;
- return (max_id < 0) ? 0 : max_id;
+ return (max_idx < 0) ? 0 : max_idx;
}
static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
down_write(&sem_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd,
&semid64->sem_perm, 0);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
#endif
-int shm_init_ns(struct ipc_namespace *ns)
+void shm_init_ns(struct ipc_namespace *ns)
{
ns->shm_ctlmax = SHMMAX;
ns->shm_ctlall = SHMALL;
ns->shm_ctlmni = SHMMNI;
ns->shm_rmid_forced = 0;
ns->shm_tot = 0;
- return ipc_init_ids(&shm_ids(ns));
+ ipc_init_ids(&shm_ids(ns));
}
/*
static int __init ipc_ns_init(void)
{
- const int err = shm_init_ns(&init_ipc_ns);
- WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
- return err;
+ shm_init_ns(&init_ipc_ns);
+ return 0;
}
pure_initcall(ipc_ns_init);
*/
static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
+ struct kern_ipc_perm *ipcp;
+
+ rcu_read_lock();
+ ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
+ if (IS_ERR(ipcp))
+ goto err;
+ ipc_lock_object(ipcp);
+ /*
+ * ipc_rmid() may have already freed the ID while ipc_lock_object()
+ * was spinning: here verify that the structure is still valid.
+ * Upon races with RMID, return -EIDRM, thus indicating that
+ * the ID points to a removed identifier.
+ */
+ if (ipc_valid_object(ipcp)) {
+ /* return a locked ipc object upon success */
+ return container_of(ipcp, struct shmid_kernel, shm_perm);
+ }
+
+ ipc_unlock_object(ipcp);
+err:
+ rcu_read_unlock();
/*
* Callers of shm_lock() must validate the status of the returned ipc
- * object pointer (as returned by ipc_lock()), and error out as
- * appropriate.
+ * object pointer and error out as appropriate.
*/
- if (IS_ERR(ipcp))
- return (void *)ipcp;
- return container_of(ipcp, struct shmid_kernel, shm_perm);
+ return (void *)ipcp;
}
static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
if (is_file_hugepages(file) && shp->mlock_user)
user_shm_unlock(size, shp->mlock_user);
fput(file);
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+ return error;
no_file:
call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
return error;
down_write(&shm_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
&shmid64->shm_perm, 0);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
shminfo->shmall = ns->shm_ctlall;
shminfo->shmmin = SHMMIN;
down_read(&shm_ids(ns).rwsem);
- err = ipc_get_maxid(&shm_ids(ns));
+ err = ipc_get_maxidx(&shm_ids(ns));
up_read(&shm_ids(ns).rwsem);
if (err < 0)
err = 0;
shm_info->shm_tot = ns->shm_tot;
shm_info->swap_attempts = 0;
shm_info->swap_successes = 0;
- err = ipc_get_maxid(&shm_ids(ns));
+ err = ipc_get_maxidx(&shm_ids(ns));
up_read(&shm_ids(ns).rwsem);
if (err < 0)
err = 0;
int cmd, struct shmid64_ds *tbuf)
{
struct shmid_kernel *shp;
- int id = 0;
int err;
memset(tbuf, 0, sizeof(*tbuf));
err = PTR_ERR(shp);
goto out_unlock;
}
- id = shp->shm_perm.id;
} else { /* IPC_STAT */
shp = shm_obtain_object_check(ns, shmid);
if (IS_ERR(shp)) {
tbuf->shm_lpid = pid_vnr(shp->shm_lprid);
tbuf->shm_nattch = shp->shm_nattch;
- ipc_unlock_object(&shp->shm_perm);
- rcu_read_unlock();
- return id;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * SHM_STAT and SHM_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = shp->shm_perm.id;
+ }
+ ipc_unlock_object(&shp->shm_perm);
out_unlock:
rcu_read_unlock();
return err;
*/
static int __init ipc_init(void)
{
- int err_sem, err_msg;
-
proc_mkdir("sysvipc", NULL);
- err_sem = sem_init();
- WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
- err_msg = msg_init();
- WARN(err_msg, "ipc: sysv msg_init failed: %d\n", err_msg);
+ sem_init();
+ msg_init();
shm_init();
- return err_msg ? err_msg : err_sem;
+ return 0;
}
device_initcall(ipc_init);
* Set up the sequence range to use for the ipc identifier range (limited
* below IPCMNI) then initialise the keys hashtable and ids idr.
*/
-int ipc_init_ids(struct ipc_ids *ids)
+void ipc_init_ids(struct ipc_ids *ids)
{
- int err;
ids->in_use = 0;
ids->seq = 0;
init_rwsem(&ids->rwsem);
- err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
- if (err)
- return err;
+ rhashtable_init(&ids->key_ht, &ipc_kht_params);
idr_init(&ids->ipcs_idr);
- ids->tables_initialized = true;
- ids->max_id = -1;
+ ids->max_idx = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
#endif
- return 0;
}
#ifdef CONFIG_PROC_FS
*/
static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
{
- struct kern_ipc_perm *ipcp = NULL;
+ struct kern_ipc_perm *ipcp;
- if (likely(ids->tables_initialized))
- ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+ ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
ipc_kht_params);
+ if (!ipcp)
+ return NULL;
- if (ipcp) {
- rcu_read_lock();
- ipc_lock_object(ipcp);
- return ipcp;
- }
-
- return NULL;
+ rcu_read_lock();
+ ipc_lock_object(ipcp);
+ return ipcp;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
/*
- * Specify desired id for next allocated IPC object.
+ * Insert new IPC object into idr tree, and set sequence number and id
+ * in the correct order.
+ * Especially:
+ * - the sequence number must be set before inserting the object into the idr,
+ * because the sequence number is accessed without a lock.
+ * - the id can/must be set after inserting the object into the idr.
+ * All accesses must be done after getting kern_ipc_perm.lock.
+ *
+ * The caller must own kern_ipc_perm.lock.of the new object.
+ * On error, the function returns a (negative) error code.
*/
-#define ipc_idr_alloc(ids, new) \
- idr_alloc(&(ids)->ipcs_idr, (new), \
- (ids)->next_id < 0 ? 0 : ipcid_to_idx((ids)->next_id),\
- 0, GFP_NOWAIT)
-
-static inline int ipc_buildid(int id, struct ipc_ids *ids,
- struct kern_ipc_perm *new)
+static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
{
- if (ids->next_id < 0) { /* default, behave as !CHECKPOINT_RESTORE */
+ int idx, next_id = -1;
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ next_id = ids->next_id;
+ ids->next_id = -1;
+#endif
+
+ /*
+ * As soon as a new object is inserted into the idr,
+ * ipc_obtain_object_idr() or ipc_obtain_object_check() can find it,
+ * and the lockless preparations for ipc operations can start.
+ * This means especially: permission checks, audit calls, allocation
+ * of undo structures, ...
+ *
+ * Thus the object must be fully initialized, and if something fails,
+ * then the full tear-down sequence must be followed.
+ * (i.e.: set new->deleted, reduce refcount, call_rcu())
+ */
+
+ if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
new->seq = ids->seq++;
if (ids->seq > IPCID_SEQ_MAX)
ids->seq = 0;
+ idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT);
} else {
- new->seq = ipcid_to_seqx(ids->next_id);
- ids->next_id = -1;
+ new->seq = ipcid_to_seqx(next_id);
+ idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
+ 0, GFP_NOWAIT);
}
-
- return SEQ_MULTIPLIER * new->seq + id;
+ if (idx >= 0)
+ new->id = SEQ_MULTIPLIER * new->seq + idx;
+ return idx;
}
-#else
-#define ipc_idr_alloc(ids, new) \
- idr_alloc(&(ids)->ipcs_idr, (new), 0, 0, GFP_NOWAIT)
-
-static inline int ipc_buildid(int id, struct ipc_ids *ids,
- struct kern_ipc_perm *new)
-{
- new->seq = ids->seq++;
- if (ids->seq > IPCID_SEQ_MAX)
- ids->seq = 0;
-
- return SEQ_MULTIPLIER * new->seq + id;
-}
-
-#endif /* CONFIG_CHECKPOINT_RESTORE */
-
/**
* ipc_addid - add an ipc identifier
* @ids: ipc identifier set
* @limit: limit for the number of used ids
*
* Add an entry 'new' to the ipc ids idr. The permissions object is
- * initialised and the first free entry is set up and the id assigned
+ * initialised and the first free entry is set up and the index assigned
* is returned. The 'new' entry is returned in a locked state on success.
+ *
* On failure the entry is not locked and a negative err-code is returned.
+ * The caller must use ipc_rcu_putref() to free the identifier.
*
* Called with writer ipc_ids.rwsem held.
*/
{
kuid_t euid;
kgid_t egid;
- int id, err;
+ int idx, err;
+
+ /* 1) Initialize the refcount so that ipc_rcu_putref works */
+ refcount_set(&new->refcount, 1);
if (limit > IPCMNI)
limit = IPCMNI;
- if (!ids->tables_initialized || ids->in_use >= limit)
+ if (ids->in_use >= limit)
return -ENOSPC;
idr_preload(GFP_KERNEL);
- refcount_set(&new->refcount, 1);
spin_lock_init(&new->lock);
- new->deleted = false;
rcu_read_lock();
spin_lock(&new->lock);
new->cuid = new->uid = euid;
new->gid = new->cgid = egid;
- id = ipc_idr_alloc(ids, new);
+ new->deleted = false;
+
+ idx = ipc_idr_alloc(ids, new);
idr_preload_end();
- if (id >= 0 && new->key != IPC_PRIVATE) {
+ if (idx >= 0 && new->key != IPC_PRIVATE) {
err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
ipc_kht_params);
if (err < 0) {
- idr_remove(&ids->ipcs_idr, id);
- id = err;
+ idr_remove(&ids->ipcs_idr, idx);
+ idx = err;
}
}
- if (id < 0) {
+ if (idx < 0) {
+ new->deleted = true;
spin_unlock(&new->lock);
rcu_read_unlock();
- return id;
+ return idx;
}
ids->in_use++;
- if (id > ids->max_id)
- ids->max_id = id;
-
- new->id = ipc_buildid(id, ids, new);
-
- return id;
+ if (idx > ids->max_idx)
+ ids->max_idx = idx;
+ return idx;
}
/**
*/
void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
{
- int lid = ipcid_to_idx(ipcp->id);
+ int idx = ipcid_to_idx(ipcp->id);
- idr_remove(&ids->ipcs_idr, lid);
+ idr_remove(&ids->ipcs_idr, idx);
ipc_kht_remove(ids, ipcp);
ids->in_use--;
ipcp->deleted = true;
- if (unlikely(lid == ids->max_id)) {
+ if (unlikely(idx == ids->max_idx)) {
do {
- lid--;
- if (lid == -1)
+ idx--;
+ if (idx == -1)
break;
- } while (!idr_find(&ids->ipcs_idr, lid));
- ids->max_id = lid;
+ } while (!idr_find(&ids->ipcs_idr, idx));
+ ids->max_idx = idx;
}
}
ipcp->key = IPC_PRIVATE;
}
-int ipc_rcu_getref(struct kern_ipc_perm *ptr)
+bool ipc_rcu_getref(struct kern_ipc_perm *ptr)
{
return refcount_inc_not_zero(&ptr->refcount);
}
struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
- int lid = ipcid_to_idx(id);
-
- if (unlikely(!ids->tables_initialized))
- return ERR_PTR(-EINVAL);
+ int idx = ipcid_to_idx(id);
- out = idr_find(&ids->ipcs_idr, lid);
+ out = idr_find(&ids->ipcs_idr, idx);
if (!out)
return ERR_PTR(-EINVAL);
return out;
}
-/**
- * ipc_lock - lock an ipc structure without rwsem held
- * @ids: ipc identifier set
- * @id: ipc id to look for
- *
- * Look for an id in the ipc ids idr and lock the associated ipc object.
- *
- * The ipc object is locked on successful exit.
- */
-struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
-{
- struct kern_ipc_perm *out;
-
- rcu_read_lock();
- out = ipc_obtain_object_idr(ids, id);
- if (IS_ERR(out))
- goto err;
-
- spin_lock(&out->lock);
-
- /*
- * ipc_rmid() may have already freed the ID while ipc_lock()
- * was spinning: here verify that the structure is still valid.
- * Upon races with RMID, return -EIDRM, thus indicating that
- * the ID points to a removed identifier.
- */
- if (ipc_valid_object(out))
- return out;
-
- spin_unlock(&out->lock);
- out = ERR_PTR(-EIDRM);
-err:
- rcu_read_unlock();
- return out;
-}
-
/**
* ipc_obtain_object_check
* @ids: ipc identifier set
* @id: ipc id to look for
*
- * Similar to ipc_obtain_object_idr() but also checks
- * the ipc object reference counter.
+ * Similar to ipc_obtain_object_idr() but also checks the ipc object
+ * sequence number.
*
* Call inside the RCU critical section.
* The ipc object is *not* locked on exit.
}
/**
- * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
+ * ipcctl_obtain_check - retrieve an ipc object and check permissions
* @ns: ipc namespace
* @ids: the table of ids where to look for the ipc
* @id: the id of the ipc to retrieve
*
* This function does some common audit and permissions check for some IPC_XXX
* cmd and is called from semctl_down, shmctl_down and msgctl_down.
- * It must be called without any lock held and:
*
- * - retrieves the ipc with the given id in the given table.
+ * It:
+ * - retrieves the ipc object with the given id in the given table.
* - performs some audit and permission check, depending on the given cmd
* - returns a pointer to the ipc object or otherwise, the corresponding
* error.
*
* Call holding the both the rwsem and the rcu read lock.
*/
-struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm)
{
#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
#define SEQ_MULTIPLIER (IPCMNI)
-int sem_init(void);
-int msg_init(void);
+void sem_init(void);
+void msg_init(void);
void shm_init(void);
struct ipc_namespace;
#endif
#ifdef CONFIG_SYSVIPC
-int sem_init_ns(struct ipc_namespace *ns);
-int msg_init_ns(struct ipc_namespace *ns);
-int shm_init_ns(struct ipc_namespace *ns);
+void sem_init_ns(struct ipc_namespace *ns);
+void msg_init_ns(struct ipc_namespace *ns);
+void shm_init_ns(struct ipc_namespace *ns);
void sem_exit_ns(struct ipc_namespace *ns);
void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
#else
-static inline int sem_init_ns(struct ipc_namespace *ns) { return 0; }
-static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; }
-static inline int shm_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline void sem_init_ns(struct ipc_namespace *ns) { }
+static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline void shm_init_ns(struct ipc_namespace *ns) { }
static inline void sem_exit_ns(struct ipc_namespace *ns) { }
static inline void msg_exit_ns(struct ipc_namespace *ns) { }
struct seq_file;
struct ipc_ids;
-int ipc_init_ids(struct ipc_ids *);
+void ipc_init_ids(struct ipc_ids *ids);
#ifdef CONFIG_PROC_FS
void __init ipc_init_proc_interface(const char *path, const char *header,
int ids, int (*show)(struct seq_file *, void *));
int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
/**
- * ipc_get_maxid - get the last assigned id
+ * ipc_get_maxidx - get the highest assigned index
* @ids: ipc identifier set
*
* Called with ipc_ids.rwsem held for reading.
*/
-static inline int ipc_get_maxid(struct ipc_ids *ids)
+static inline int ipc_get_maxidx(struct ipc_ids *ids)
{
if (ids->in_use == 0)
return -1;
if (ids->in_use == IPCMNI)
return IPCMNI - 1;
- return ids->max_id;
+ return ids->max_idx;
}
/*
* refcount is initialized by ipc_addid(), before that point call_rcu()
* must be used.
*/
-int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+bool ipc_rcu_getref(struct kern_ipc_perm *ptr);
void ipc_rcu_putref(struct kern_ipc_perm *ptr,
void (*func)(struct rcu_head *head));
-struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
-struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
-static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int id)
{
- return uid / SEQ_MULTIPLIER != ipcp->seq;
+ return ipcid_to_seqx(id) != ipcp->seq;
}
static inline void ipc_lock_object(struct kern_ipc_perm *perm)
#include <asm/sections.h>
/* vmcoreinfo stuff */
-static unsigned char *vmcoreinfo_data;
-static size_t vmcoreinfo_size;
+unsigned char *vmcoreinfo_data;
+size_t vmcoreinfo_size;
u32 *vmcoreinfo_note;
/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
if (vmcoreinfo_data_safecopy)
vmcoreinfo_data = vmcoreinfo_data_safecopy;
- vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
+ vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
update_vmcoreinfo_note();
}
VMCOREINFO_SYMBOL(init_uts_ns);
VMCOREINFO_SYMBOL(node_online_map);
#ifdef CONFIG_MMU
- VMCOREINFO_SYMBOL(swapper_pg_dir);
+ VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
#endif
VMCOREINFO_SYMBOL(_stext);
VMCOREINFO_SYMBOL(vmap_area_list);
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
{
- struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ struct vm_area_struct *vma;
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (vma)
vma_init(vma, mm);
return vma;
tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ tsk->last_switch_time = 0;
#endif
tsk->mm = NULL;
return -ENOMEM;
atomic_set(&sig->count, 1);
+ spin_lock_irq(¤t->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+ spin_unlock_irq(¤t->sighand->siglock);
return 0;
}
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
+/*
+ * Zero (default value) means use sysctl_hung_task_timeout_secs:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+
int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
if (switch_count != t->last_switch_count) {
t->last_switch_count = switch_count;
+ t->last_switch_time = jiffies;
return;
}
+ if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+ return;
trace_sched_process_hang(t);
for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs;
- long t = hung_timeout_jiffies(hung_last_checked, timeout);
+ unsigned long interval = sysctl_hung_task_check_interval_secs;
+ long t;
+ if (interval == 0)
+ interval = timeout;
+ interval = min_t(unsigned long, interval, timeout);
+ t = hung_timeout_jiffies(hung_last_checked, interval);
if (t <= 0) {
if (!atomic_xchg(&reset_hung_task, 0))
check_hung_uninterruptible_tasks(timeout);
return true;
}
+static unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ return (unsigned long)offset_to_ptr(&sym->value_offset);
+#else
+ return sym->value;
+#endif
+}
+
+static const char *kernel_symbol_name(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ return offset_to_ptr(&sym->name_offset);
+#else
+ return sym->name;
+#endif
+}
+
static int cmp_name(const void *va, const void *vb)
{
const char *a;
const struct kernel_symbol *b;
a = va; b = vb;
- return strcmp(a, b->name);
+ return strcmp(a, kernel_symbol_name(b));
}
static bool find_symbol_in_section(const struct symsearch *syms,
sym = NULL;
preempt_enable();
- return sym ? (void *)sym->value : NULL;
+ return sym ? (void *)kernel_symbol_value(sym) : NULL;
}
EXPORT_SYMBOL_GPL(__symbol_get);
for (i = 0; i < ARRAY_SIZE(arr); i++) {
for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
- if (find_symbol(s->name, &owner, NULL, true, false)) {
+ if (find_symbol(kernel_symbol_name(s), &owner, NULL,
+ true, false)) {
pr_err("%s: exports duplicate symbol %s"
" (owned by %s)\n",
- mod->name, s->name, module_name(owner));
+ mod->name, kernel_symbol_name(s),
+ module_name(owner));
return -ENOEXEC;
}
}
ksym = resolve_symbol_wait(mod, info, name);
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
- sym[i].st_value = ksym->value;
+ sym[i].st_value = kernel_symbol_value(ksym);
break;
}
ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
else
ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
- return ks != NULL && ks->value == value;
+ return ks != NULL && kernel_symbol_value(ks) == value;
}
/* As per nm */
void __init console_init(void)
{
int ret;
- initcall_t *call;
+ initcall_t call;
+ initcall_entry_t *ce;
/* Setup the default TTY line discipline. */
n_tty_init();
* set up the console device so that later boot sequences can
* inform about problems etc..
*/
- call = __con_initcall_start;
+ ce = __con_initcall_start;
trace_initcall_level("console");
- while (call < __con_initcall_end) {
- trace_initcall_start((*call));
- ret = (*call)();
- trace_initcall_finish((*call), ret);
- call++;
+ while (ce < __con_initcall_end) {
+ call = initcall_from_entry(ce);
+ trace_initcall_start(call);
+ ret = call();
+ trace_initcall_finish(call, ret);
+ ce++;
}
}
wait_queue_entry_t *curr, *next;
int cnt = 0;
+ lockdep_assert_held(&wq_head->lock);
+
if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
curr = list_next_entry(bookmark, entry);
return t->sighand->action[sig - 1].sa.sa_handler;
}
-static int sig_handler_ignored(void __user *handler, int sig)
+static inline bool sig_handler_ignored(void __user *handler, int sig)
{
/* Is it explicitly or implicitly ignored? */
return handler == SIG_IGN ||
- (handler == SIG_DFL && sig_kernel_ignore(sig));
+ (handler == SIG_DFL && sig_kernel_ignore(sig));
}
-static int sig_task_ignored(struct task_struct *t, int sig, bool force)
+static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
{
void __user *handler;
if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
handler == SIG_DFL && !(force && sig_kernel_only(sig)))
- return 1;
+ return true;
return sig_handler_ignored(handler, sig);
}
-static int sig_ignored(struct task_struct *t, int sig, bool force)
+static bool sig_ignored(struct task_struct *t, int sig, bool force)
{
/*
* Blocked signals are never ignored, since the
* unblocked.
*/
if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
- return 0;
+ return false;
/*
* Tracers may want to know about even ignored signal unless it
* by SIGNAL_UNKILLABLE task.
*/
if (t->ptrace && sig != SIGKILL)
- return 0;
+ return false;
return sig_task_ignored(t, sig, force);
}
* Re-calculate pending state from the set of locally pending
* signals, globally pending signals, and blocked signals.
*/
-static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
+static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
{
unsigned long ready;
long i;
#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
-static int recalc_sigpending_tsk(struct task_struct *t)
+static bool recalc_sigpending_tsk(struct task_struct *t)
{
if ((t->jobctl & JOBCTL_PENDING_MASK) ||
PENDING(&t->pending, &t->blocked) ||
PENDING(&t->signal->shared_pending, &t->blocked)) {
set_tsk_thread_flag(t, TIF_SIGPENDING);
- return 1;
+ return true;
}
+
/*
* We must never clear the flag in another thread, or in current
* when it's possible the current syscall is returning -ERESTART*.
* So we don't clear it here, and only callers who know they should do.
*/
- return 0;
+ return false;
}
/*
}
}
-int unhandled_signal(struct task_struct *tsk, int sig)
+bool unhandled_signal(struct task_struct *tsk, int sig)
{
void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
if (is_global_init(tsk))
- return 1;
+ return true;
+
if (handler != SIG_IGN && handler != SIG_DFL)
- return 0;
+ return false;
+
/* if ptraced, let the tracer determine */
return !tsk->ptrace;
}
*
* All callers must be holding the siglock.
*/
-static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
+static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
{
struct sigqueue *q, *n;
sigset_t m;
sigandsets(&m, mask, &s->signal);
if (sigisemptyset(&m))
- return 0;
+ return;
sigandnsets(&s->signal, &s->signal, mask);
list_for_each_entry_safe(q, n, &s->list, list) {
__sigqueue_free(q);
}
}
- return 1;
}
static inline int is_si_special(const struct siginfo *info)
/*
* called with RCU read lock from check_kill_permission()
*/
-static int kill_ok_by_cred(struct task_struct *t)
+static bool kill_ok_by_cred(struct task_struct *t)
{
const struct cred *cred = current_cred();
const struct cred *tcred = __task_cred(t);
- if (uid_eq(cred->euid, tcred->suid) ||
- uid_eq(cred->euid, tcred->uid) ||
- uid_eq(cred->uid, tcred->suid) ||
- uid_eq(cred->uid, tcred->uid))
- return 1;
-
- if (ns_capable(tcred->user_ns, CAP_KILL))
- return 1;
-
- return 0;
+ return uid_eq(cred->euid, tcred->suid) ||
+ uid_eq(cred->euid, tcred->uid) ||
+ uid_eq(cred->uid, tcred->suid) ||
+ uid_eq(cred->uid, tcred->uid) ||
+ ns_capable(tcred->user_ns, CAP_KILL);
}
/*
* as soon as they're available, so putting the signal on the shared queue
* will be equivalent to sending it to one such thread.
*/
-static inline int wants_signal(int sig, struct task_struct *p)
+static inline bool wants_signal(int sig, struct task_struct *p)
{
if (sigismember(&p->blocked, sig))
- return 0;
+ return false;
+
if (p->flags & PF_EXITING)
- return 0;
+ return false;
+
if (sig == SIGKILL)
- return 1;
+ return true;
+
if (task_is_stopped_or_traced(p))
- return 0;
+ return false;
+
return task_curr(p) || !signal_pending(p);
}
return;
}
-static inline int legacy_queue(struct sigpending *signals, int sig)
+static inline bool legacy_queue(struct sigpending *signals, int sig)
{
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
return error;
}
-static int kill_as_cred_perm(const struct cred *cred,
- struct task_struct *target)
+static inline bool kill_as_cred_perm(const struct cred *cred,
+ struct task_struct *target)
{
const struct cred *pcred = __task_cred(target);
- if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) &&
- !uid_eq(cred->uid, pcred->suid) && !uid_eq(cred->uid, pcred->uid))
- return 0;
- return 1;
+
+ return uid_eq(cred->euid, pcred->suid) ||
+ uid_eq(cred->euid, pcred->uid) ||
+ uid_eq(cred->uid, pcred->suid) ||
+ uid_eq(cred->uid, pcred->uid);
}
/* like kill_pid_info(), but doesn't use uid/euid of "current" */
return send_sig_info(sig, __si_special(priv), p);
}
-void
-force_sig(int sig, struct task_struct *p)
+void force_sig(int sig, struct task_struct *p)
{
force_sig_info(sig, SEND_SIG_PRIV, p);
}
* the problem was already a SIGSEGV, we'll want to
* make sure we don't even try to deliver the signal..
*/
-int
-force_sigsegv(int sig, struct task_struct *p)
+void force_sigsegv(int sig, struct task_struct *p)
{
if (sig == SIGSEGV) {
unsigned long flags;
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
force_sig(SIGSEGV, p);
- return 0;
}
int force_sig_fault(int sig, int code, void __user *addr
spin_unlock_irqrestore(&sighand->siglock, flags);
}
-static inline int may_ptrace_stop(void)
+static inline bool may_ptrace_stop(void)
{
if (!likely(current->ptrace))
- return 0;
+ return false;
/*
* Are we in the middle of do_coredump?
* If so and our tracer is also part of the coredump stopping
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
- return 0;
+ return false;
- return 1;
+ return true;
}
/*
* Return non-zero if there is a SIGKILL that should be waking us up.
* Called with the siglock held.
*/
-static int sigkill_pending(struct task_struct *tsk)
+static bool sigkill_pending(struct task_struct *tsk)
{
- return sigismember(&tsk->pending.signal, SIGKILL) ||
- sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
+ return sigismember(&tsk->pending.signal, SIGKILL) ||
+ sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
}
/*
return signr;
}
-int get_signal(struct ksignal *ksig)
+bool get_signal(struct ksignal *ksig)
{
struct sighand_struct *sighand = current->sighand;
struct signal_struct *signal = current->signal;
task_work_run();
if (unlikely(uprobe_deny_signal()))
- return 0;
+ return false;
/*
* Do this once, we can't return to user-mode if freezing() == T.
}
#endif
-static int do_sigpending(sigset_t *set)
+static void do_sigpending(sigset_t *set)
{
spin_lock_irq(¤t->sighand->siglock);
sigorsets(set, ¤t->pending.signal,
/* Outside the lock because only this thread touches it. */
sigandsets(set, ¤t->blocked, set);
- return 0;
}
/**
SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
{
sigset_t set;
- int err;
if (sigsetsize > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err && copy_to_user(uset, &set, sigsetsize))
- err = -EFAULT;
- return err;
+ do_sigpending(&set);
+
+ if (copy_to_user(uset, &set, sigsetsize))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
compat_size_t, sigsetsize)
{
sigset_t set;
- int err;
if (sigsetsize > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err)
- err = put_compat_sigset(uset, &set, sigsetsize);
- return err;
+ do_sigpending(&set);
+
+ return put_compat_sigset(uset, &set, sigsetsize);
}
#endif
SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset)
{
sigset_t set;
- int err;
if (sizeof(old_sigset_t) > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t)))
- err = -EFAULT;
- return err;
+ do_sigpending(&set);
+
+ if (copy_to_user(uset, &set, sizeof(old_sigset_t)))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
{
sigset_t set;
- int err = do_sigpending(&set);
- if (!err)
- err = put_user(set.sig[0], set32);
- return err;
+
+ do_sigpending(&set);
+
+ return put_user(set.sig[0], set32);
}
#endif
size_t, sigsetsize)
{
struct k_sigaction new_sa, old_sa;
- int ret = -EINVAL;
+ int ret;
/* XXX: Don't preclude handling different sized sigset_t's. */
if (sigsetsize != sizeof(sigset_t))
- goto out;
+ return -EINVAL;
- if (act) {
- if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
- return -EFAULT;
- }
+ if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
+ return -EFAULT;
ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+ if (ret)
+ return ret;
- if (!ret && oact) {
- if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
- return -EFAULT;
- }
-out:
- return ret;
+ if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
-/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
#ifdef CONFIG_DETECT_HUNG_TASK
static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
#endif
void __user *buffer, size_t *lenp, loff_t *ppos);
#ifdef CONFIG_MAGIC_SYSRQ
-/* Note: sysrq code uses it's own private copy */
+/* Note: sysrq code uses its own private copy */
static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
static int sysrq_sysctl_handler(struct ctl_table *table, int write,
.proc_handler = proc_dohung_task_timeout_secs,
.extra2 = &hung_task_timeout_max,
},
+ {
+ .procname = "hung_task_check_interval_secs",
+ .data = &sysctl_hung_task_check_interval_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
{
.procname = "hung_task_warnings",
.data = &sysctl_hung_task_warnings,
}
/**
- * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * proc_first_pos_non_zero_ignore - check if first position is allowed
* @ppos: file position
* @table: the sysctl table
*
* Returns true if the first position is non-zero and the sysctl_writes_strict
* mode indicates this is not allowed for numeric input types. String proc
- * hadlers can ignore the return value.
+ * handlers can ignore the return value.
*/
static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
struct ctl_table *table)
}
EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+ struct tracepoint * const *end,
+ void (*fct)(struct tracepoint *tp, void *priv),
+ void *priv)
+{
+ if (!begin)
+ return;
+
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+ const int *iter;
+
+ for (iter = (const int *)begin; iter < (const int *)end; iter++)
+ fct(offset_to_ptr(iter), priv);
+ } else {
+ struct tracepoint * const *iter;
+
+ for (iter = begin; iter < end; iter++)
+ fct(*iter, priv);
+ }
+}
+
#ifdef CONFIG_MODULES
bool trace_module_has_bad_taint(struct module *mod)
{
* Ensure the tracer unregistered the module's probes before the module
* teardown is performed. Prevents leaks of probe and data pointers.
*/
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
- struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
{
- struct tracepoint * const *iter;
-
- if (!begin)
- return;
- for (iter = begin; iter < end; iter++)
- WARN_ON_ONCE((*iter)->funcs);
+ WARN_ON_ONCE(tp->funcs);
}
static int tracepoint_module_coming(struct module *mod)
* Called the going notifier before checking for
* quiescence.
*/
- tp_module_going_check_quiescent(mod->tracepoints_ptrs,
- mod->tracepoints_ptrs + mod->num_tracepoints);
+ for_each_tracepoint_range(mod->tracepoints_ptrs,
+ mod->tracepoints_ptrs + mod->num_tracepoints,
+ tp_module_going_check_quiescent, NULL);
break;
}
}
__initcall(init_tracepoints);
#endif /* CONFIG_MODULES */
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
- struct tracepoint * const *end,
- void (*fct)(struct tracepoint *tp, void *priv),
- void *priv)
-{
- struct tracepoint * const *iter;
-
- if (!begin)
- return;
- for (iter = begin; iter < end; iter++)
- fct(*iter, priv);
-}
-
/**
* for_each_kernel_tracepoint - iteration on all kernel tracepoints
* @fct: callback
/* root_user.__count is 1, for init task cred */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = REFCOUNT_INIT(1),
.processes = ATOMIC_INIT(1),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
hlist_for_each_entry(user, hashent, uidhash_node) {
if (uid_eq(user->uid, uid)) {
- atomic_inc(&user->__count);
+ refcount_inc(&user->__count);
return user;
}
}
if (!up)
return;
- local_irq_save(flags);
- if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+ if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
free_user(up, flags);
- else
- local_irq_restore(flags);
}
struct user_struct *alloc_uid(kuid_t uid)
goto out_unlock;
new->uid = uid;
- atomic_set(&new->__count, 1);
+ refcount_set(&new->__count, 1);
ratelimit_state_init(&new->ratelimit, HZ, 100);
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
# Generated files
#
gen_crc32table
+gen_crc64table
crc32table.h
+crc64table.h
oid_registry_data.c
endchoice
+config CRC64
+ tristate "CRC64 functions"
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC64 functions, but a module built outside
+ the kernel tree does. Such modules that use library CRC64
+ functions require M here.
+
config CRC4
tristate "CRC4 functions"
help
config RANDOM32_SELFTEST
bool "PRNG perform self test on init"
- default n
help
This option enables the 32 bit PRNG library functions to perform a
self test on initialization.
tristate "torture tests for locking"
depends on DEBUG_KERNEL
select TORTURE_TEST
- default n
help
This option provides a kernel module that runs torture tests
on kernel locking primitives. The kernel module may be built
tristate "Linux Kernel Dump Test Tool Module"
depends on DEBUG_FS
depends on BLOCK
- default n
help
This module enables testing of the different dumping mechanisms by
inducing system failures at predefined crash points.
bool "Kprobes sanity tests"
depends on DEBUG_KERNEL
depends on KPROBES
- default n
help
This option provides for testing basic kprobes functionality on
boot. Samples of kprobe and kretprobe are inserted and
config BACKTRACE_SELF_TEST
tristate "Self test for the backtrace code"
depends on DEBUG_KERNEL
- default n
help
This option provides a kernel module that can be used to test
the kernel stack backtrace code. This option is not useful
config TEST_BITMAP
tristate "Test bitmap_*() family of functions at runtime"
- default n
help
Enable this option to test the bitmap functions at boot.
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
- default n
help
Enable this option to test the rhashtable functions at boot.
config TEST_HASH
tristate "Perform selftest on hash functions"
- default n
help
Enable this option to test the kernel's integer (<linux/hash.h>),
string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
config TEST_PARMAN
tristate "Perform selftest on priority array manager"
- default n
depends on PARMAN
help
Enable this option to test priority array manager on boot
config TEST_LKM
tristate "Test module loading with 'hello world' module"
- default n
depends on m
help
This builds the "test_module" module that emits "Hello, world"
config TEST_USER_COPY
tristate "Test user/kernel boundary protections"
- default n
depends on m
help
This builds the "test_user_copy" module that runs sanity checks
config TEST_BPF
tristate "Test BPF filter functionality"
- default n
depends on m && NET
help
This builds the "test_bpf" module that runs various test vectors
config FIND_BIT_BENCHMARK
tristate "Test find_bit functions"
- default n
help
This builds the "test_find_bit" module that measure find_*_bit()
functions performance.
config TEST_FIRMWARE
tristate "Test firmware loading via userspace interface"
- default n
depends on FW_LOADER
help
This builds the "test_firmware" module that creates a userspace
config TEST_SYSCTL
tristate "sysctl test driver"
- default n
depends on PROC_SYSCTL
help
This builds the "test_sysctl" module. This driver enables to test the
config TEST_UDELAY
tristate "udelay test driver"
- default n
help
This builds the "udelay_test" module that helps to make sure
that udelay() is working properly.
config TEST_STATIC_KEYS
tristate "Test static keys"
- default n
depends on m
help
Test the static key interfaces.
config TEST_KMOD
tristate "kmod stress tester"
- default n
depends on m
depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS
depends on NETDEVICES && NET_CORE && INET # for TUN
obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o
obj-$(CONFIG_CRC32) += crc32.o
+obj-$(CONFIG_CRC64) += crc64.o
obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o
obj-$(CONFIG_CRC4) += crc4.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
hostprogs-y := gen_crc32table
+hostprogs-y += gen_crc64table
clean-files := crc32table.h
+clean-files += crc64table.h
$(obj)/crc32.o: $(obj)/crc32table.h
$(obj)/crc32table.h: $(obj)/gen_crc32table
$(call cmd,crc32)
+$(obj)/crc64.o: $(obj)/crc64table.h
+
+quiet_cmd_crc64 = GEN $@
+ cmd_crc64 = $< > $@
+
+$(obj)/crc64table.h: $(obj)/gen_crc64table
+ $(call cmd,crc64)
+
#
# Build a fast OID lookip registry from include/linux/oid_registry.h
#
* @buf: array of u32 (in host byte order), the source bitmap
* @nbits: number of bits in @bitmap
*/
-void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
- unsigned int nbits)
+void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits)
{
unsigned int i, halfwords;
- if (!nbits)
- return;
-
halfwords = DIV_ROUND_UP(nbits, 32);
for (i = 0; i < halfwords; i++) {
bitmap[i/2] = (unsigned long) buf[i];
{
unsigned int i, halfwords;
- if (!nbits)
- return;
-
halfwords = DIV_ROUND_UP(nbits, 32);
for (i = 0; i < halfwords; i++) {
buf[i] = (u32) (bitmap[i/2] & UINT_MAX);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ * Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+ or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+ size_t i, t;
+
+ const unsigned char *_p = p;
+
+ for (i = 0; i < len; i++) {
+ t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+ crc = crc64table[t] ^ (crc << 8);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate lookup table for the table-driven CRC64 calculation.
+ *
+ * gen_crc64table is executed in kernel build time and generates
+ * lib/crc64table.h. This header is included by lib/crc64.c for
+ * the table-driven CRC64 calculation.
+ *
+ * See lib/crc64.c for more information about which specification
+ * and polynomial arithmetic that gen_crc64table.c follows to
+ * generate the lookup table.
+ *
+ * Copyright 2018 SUSE Linux.
+ * Author: Coly Li <colyli@suse.de>
+ */
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <linux/swab.h>
+
+#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
+
+static uint64_t crc64_table[256] = {0};
+
+static void generate_crc64_table(void)
+{
+ uint64_t i, j, c, crc;
+
+ for (i = 0; i < 256; i++) {
+ crc = 0;
+ c = i << 56;
+
+ for (j = 0; j < 8; j++) {
+ if ((crc ^ c) & 0x8000000000000000ULL)
+ crc = (crc << 1) ^ CRC64_ECMA182_POLY;
+ else
+ crc <<= 1;
+ c <<= 1;
+ }
+
+ crc64_table[i] = crc;
+ }
+}
+
+static void print_crc64_table(void)
+{
+ int i;
+
+ printf("/* this file is generated - do not edit */\n\n");
+ printf("#include <linux/types.h>\n");
+ printf("#include <linux/cache.h>\n\n");
+ printf("static const u64 ____cacheline_aligned crc64table[256] = {\n");
+ for (i = 0; i < 256; i++) {
+ printf("\t0x%016" PRIx64 "ULL", crc64_table[i]);
+ if (i & 0x1)
+ printf(",\n");
+ else
+ printf(", ");
+ }
+ printf("};\n");
+}
+
+int main(int argc, char *argv[])
+{
+ generate_crc64_table();
+ print_crc64_table();
+ return 0;
+}
int i;
size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- if (gfp != GFP_KERNEL)
- tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
- else
- tbl = kvzalloc(size, gfp);
+ tbl = kvzalloc(size, gfp);
size = nbuckets;
- if (tbl == NULL && gfp != GFP_KERNEL) {
+ if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
nbuckets = 0;
}
+
if (tbl == NULL)
return NULL;
err = -ENOMEM;
- new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
+ new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
if (new_tbl == NULL)
goto fail;
}
}
+ /*
+ * This is api initialization and thus we need to guarantee the
+ * initial rhashtable allocation. Upon failure, retry with the
+ * smallest possible size with __GFP_NOFAIL semantics.
+ */
tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
- if (tbl == NULL)
- return -ENOMEM;
+ if (unlikely(tbl == NULL)) {
+ size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
+ tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
+ }
atomic_set(&ht->nelems, 0);
unsigned int bar;
};
-struct foo *foo;
+static struct foo *foo;
static int __init test_debug_virtual_init(void)
{
static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C...";
-static const char * const test_data_1_le[] __initconst = {
+static const char * const test_data_1[] __initconst = {
"be", "32", "db", "7b", "0a", "18", "93", "b2",
"70", "ba", "c4", "24", "7d", "83", "34", "9b",
"a6", "9c", "31", "ad", "9c", "0f", "ac", "e9",
"d14c", "9919", "b143", "0caf",
};
+static const char * const test_data_2_be[] __initconst = {
+ "be32", "db7b", "0a18", "93b2",
+ "70ba", "c424", "7d83", "349b",
+ "a69c", "31ad", "9c0f", "ace9",
+ "4cd1", "1999", "43b1", "af0c",
+};
+
static const char * const test_data_4_le[] __initconst = {
"7bdb32be", "b293180a", "24c4ba70", "9b34837d",
"ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143",
};
+static const char * const test_data_4_be[] __initconst = {
+ "be32db7b", "0a1893b2", "70bac424", "7d83349b",
+ "a69c31ad", "9c0face9", "4cd11999", "43b1af0c",
+};
+
static const char * const test_data_8_le[] __initconst = {
"b293180a7bdb32be", "9b34837d24c4ba70",
"e9ac0f9cad319ca6", "0cafb1439919d14c",
};
+static const char * const test_data_8_be[] __initconst = {
+ "be32db7b0a1893b2", "70bac4247d83349b",
+ "a69c31ad9c0face9", "4cd1199943b1af0c",
+};
+
#define FILL_CHAR '#'
static unsigned total_tests __initdata;
size_t l = len;
int gs = groupsize, rs = rowsize;
unsigned int i;
+ const bool is_be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
if (rs != 16 && rs != 32)
rs = 16;
gs = 1;
if (gs == 8)
- result = test_data_8_le;
+ result = is_be ? test_data_8_be : test_data_8_le;
else if (gs == 4)
- result = test_data_4_le;
+ result = is_be ? test_data_4_be : test_data_4_le;
else if (gs == 2)
- result = test_data_2_le;
+ result = is_be ? test_data_2_be : test_data_2_le;
else
- result = test_data_1_le;
+ result = test_data_1;
/* hex dump */
p = test;
Fill the pages with poison patterns after free_pages() and verify
the patterns before alloc_pages. The filling of the memory helps
reduce the risk of information leaks from freed data. This does
- have a potential performance impact.
+ have a potential performance impact if enabled with the
+ "page_poison=1" kernel boot option.
Note that "poison" here is not the same thing as the "HWPoison"
for CONFIG_MEMORY_FAILURE. This is software poisoning only.
say N.
config PAGE_POISONING_ZERO
- bool "Use zero for poisoning instead of random data"
+ bool "Use zero for poisoning instead of debugging value"
depends on PAGE_POISONING
---help---
Instead of using the existing poison value, fill the pages with
allocation.
If unsure, say N
- bool
config DEBUG_PAGE_REF
bool "Enable tracepoint to track down page reference manipulation"
if (new_congested) {
/* !found and storage for new one already allocated, insert */
congested = new_congested;
- new_congested = NULL;
rb_link_node(&congested->rb_node, parent, node);
rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
- goto found;
+ spin_unlock_irqrestore(&cgwb_lock, flags);
+ return congested;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
if (!new_congested)
return NULL;
- atomic_set(&new_congested->refcnt, 0);
+ refcount_set(&new_congested->refcnt, 1);
new_congested->__bdi = bdi;
new_congested->blkcg_id = blkcg_id;
goto retry;
found:
- atomic_inc(&congested->refcnt);
+ refcount_inc(&congested->refcnt);
spin_unlock_irqrestore(&cgwb_lock, flags);
kfree(new_congested);
return congested;
{
unsigned long flags;
- local_irq_save(flags);
- if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
- local_irq_restore(flags);
+ if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
return;
- }
/* bdi might already have been destroyed leaving @congested unlinked */
if (congested->__bdi) {
if (!bdi->wb_congested)
return -ENOMEM;
- atomic_set(&bdi->wb_congested->refcnt, 1);
+ refcount_set(&bdi->wb_congested->refcnt, 1);
err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (err) {
up_write(&hmm->mirrors_sem);
}
-static void hmm_invalidate_range_start(struct mmu_notifier *mn,
+static int hmm_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct hmm *hmm = mm->hmm;
VM_BUG_ON(!hmm);
atomic_inc(&hmm->sequence);
+
+ return 0;
}
static void hmm_invalidate_range_end(struct mmu_notifier *mn,
return iter + 1;
}
-/*
- * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
- * so all functions starting at paging_init should be marked __init
- * in those cases. SPARSEMEM, however, allows for memory hotplug,
- * and alloc_bootmem_node is not used.
- */
-#ifdef CONFIG_SPARSEMEM
-#define __paginginit __meminit
-#else
-#define __paginginit __init
-#endif
-
/* Memory initialisation debug and verification */
enum mminit_level {
MMINIT_WARNING,
* We cannot do anything with the page while its refcount is 0.
* Usually 0 means free, or tail of a higher-order page: in which
* case this node is no longer referenced, and should be freed;
- * however, it might mean that the page is under page_freeze_refs().
+ * however, it might mean that the page is under page_ref_freeze().
* The __remove_mapping() case is easy, again the node is now stale;
* but if page is swapcache in migrate_page_move_mapping(), it might
* still be our page, in which case it's essential to keep the node.
* work here too. We have chosen the !PageSwapCache test to
* optimize the common case, when the page is or is about to
* be freed: PageSwapCache is cleared (under spin_lock_irq)
- * in the freeze_refs section of __remove_mapping(); but Anon
+ * in the ref_freeze section of __remove_mapping(); but Anon
* page->mapping reset to NULL later, in free_pages_prepare().
*/
if (!PageSwapCache(page))
return true;
}
+/**
+ * mem_cgroup_get_oom_group - get a memory cgroup to clean up after OOM
+ * @victim: task to be killed by the OOM killer
+ * @oom_domain: memcg in case of memcg OOM, NULL in case of system-wide OOM
+ *
+ * Returns a pointer to a memory cgroup, which has to be cleaned up
+ * by killing all belonging OOM-killable tasks.
+ *
+ * Caller has to call mem_cgroup_put() on the returned non-NULL memcg.
+ */
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+ struct mem_cgroup *oom_domain)
+{
+ struct mem_cgroup *oom_group = NULL;
+ struct mem_cgroup *memcg;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return NULL;
+
+ if (!oom_domain)
+ oom_domain = root_mem_cgroup;
+
+ rcu_read_lock();
+
+ memcg = mem_cgroup_from_task(victim);
+ if (memcg == root_mem_cgroup)
+ goto out;
+
+ /*
+ * Traverse the memory cgroup hierarchy from the victim task's
+ * cgroup up to the OOMing cgroup (or root) to find the
+ * highest-level memory cgroup with oom.group set.
+ */
+ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+ if (memcg->oom_group)
+ oom_group = memcg;
+
+ if (memcg == oom_domain)
+ break;
+ }
+
+ if (oom_group)
+ css_get(&oom_group->css);
+out:
+ rcu_read_unlock();
+
+ return oom_group;
+}
+
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+ pr_info("Tasks in ");
+ pr_cont_cgroup_path(memcg->css.cgroup);
+ pr_cont(" are going to be killed due to memory.oom.group set\n");
+}
+
/**
* lock_page_memcg - lock a page->mem_cgroup binding
* @page: the page
return retval;
}
-static void tree_stat(struct mem_cgroup *memcg, unsigned long *stat)
-{
- struct mem_cgroup *iter;
- int i;
-
- memset(stat, 0, sizeof(*stat) * MEMCG_NR_STAT);
-
- for_each_mem_cgroup_tree(iter, memcg) {
- for (i = 0; i < MEMCG_NR_STAT; i++)
- stat[i] += memcg_page_state(iter, i);
- }
-}
+struct accumulated_stats {
+ unsigned long stat[MEMCG_NR_STAT];
+ unsigned long events[NR_VM_EVENT_ITEMS];
+ unsigned long lru_pages[NR_LRU_LISTS];
+ const unsigned int *stats_array;
+ const unsigned int *events_array;
+ int stats_size;
+ int events_size;
+};
-static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
+static void accumulate_memcg_tree(struct mem_cgroup *memcg,
+ struct accumulated_stats *acc)
{
- struct mem_cgroup *iter;
+ struct mem_cgroup *mi;
int i;
- memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);
+ for_each_mem_cgroup_tree(mi, memcg) {
+ for (i = 0; i < acc->stats_size; i++)
+ acc->stat[i] += memcg_page_state(mi,
+ acc->stats_array ? acc->stats_array[i] : i);
- for_each_mem_cgroup_tree(iter, memcg) {
- for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
- events[i] += memcg_sum_events(iter, i);
+ for (i = 0; i < acc->events_size; i++)
+ acc->events[i] += memcg_sum_events(mi,
+ acc->events_array ? acc->events_array[i] : i);
+
+ for (i = 0; i < NR_LRU_LISTS; i++)
+ acc->lru_pages[i] +=
+ mem_cgroup_nr_lru_pages(mi, BIT(i));
}
}
unsigned long memory, memsw;
struct mem_cgroup *mi;
unsigned int i;
+ struct accumulated_stats acc;
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
seq_printf(m, "hierarchical_memsw_limit %llu\n",
(u64)memsw * PAGE_SIZE);
- for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
- unsigned long long val = 0;
+ memset(&acc, 0, sizeof(acc));
+ acc.stats_size = ARRAY_SIZE(memcg1_stats);
+ acc.stats_array = memcg1_stats;
+ acc.events_size = ARRAY_SIZE(memcg1_events);
+ acc.events_array = memcg1_events;
+ accumulate_memcg_tree(memcg, &acc);
+ for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
- for_each_mem_cgroup_tree(mi, memcg)
- val += memcg_page_state(mi, memcg1_stats[i]) *
- PAGE_SIZE;
- seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], val);
+ seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
+ (u64)acc.stat[i] * PAGE_SIZE);
}
- for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
- unsigned long long val = 0;
-
- for_each_mem_cgroup_tree(mi, memcg)
- val += memcg_sum_events(mi, memcg1_events[i]);
- seq_printf(m, "total_%s %llu\n", memcg1_event_names[i], val);
- }
-
- for (i = 0; i < NR_LRU_LISTS; i++) {
- unsigned long long val = 0;
+ for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
+ seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
+ (u64)acc.events[i]);
- for_each_mem_cgroup_tree(mi, memcg)
- val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE;
- seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val);
- }
+ for (i = 0; i < NR_LRU_LISTS; i++)
+ seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
+ (u64)acc.lru_pages[i] * PAGE_SIZE);
#ifdef CONFIG_DEBUG_VM
{
static int memory_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- unsigned long stat[MEMCG_NR_STAT];
- unsigned long events[NR_VM_EVENT_ITEMS];
+ struct accumulated_stats acc;
int i;
/*
* Current memory state:
*/
- tree_stat(memcg, stat);
- tree_events(memcg, events);
+ memset(&acc, 0, sizeof(acc));
+ acc.stats_size = MEMCG_NR_STAT;
+ acc.events_size = NR_VM_EVENT_ITEMS;
+ accumulate_memcg_tree(memcg, &acc);
seq_printf(m, "anon %llu\n",
- (u64)stat[MEMCG_RSS] * PAGE_SIZE);
+ (u64)acc.stat[MEMCG_RSS] * PAGE_SIZE);
seq_printf(m, "file %llu\n",
- (u64)stat[MEMCG_CACHE] * PAGE_SIZE);
+ (u64)acc.stat[MEMCG_CACHE] * PAGE_SIZE);
seq_printf(m, "kernel_stack %llu\n",
- (u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
+ (u64)acc.stat[MEMCG_KERNEL_STACK_KB] * 1024);
seq_printf(m, "slab %llu\n",
- (u64)(stat[NR_SLAB_RECLAIMABLE] +
- stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
+ (u64)(acc.stat[NR_SLAB_RECLAIMABLE] +
+ acc.stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
seq_printf(m, "sock %llu\n",
- (u64)stat[MEMCG_SOCK] * PAGE_SIZE);
+ (u64)acc.stat[MEMCG_SOCK] * PAGE_SIZE);
seq_printf(m, "shmem %llu\n",
- (u64)stat[NR_SHMEM] * PAGE_SIZE);
+ (u64)acc.stat[NR_SHMEM] * PAGE_SIZE);
seq_printf(m, "file_mapped %llu\n",
- (u64)stat[NR_FILE_MAPPED] * PAGE_SIZE);
+ (u64)acc.stat[NR_FILE_MAPPED] * PAGE_SIZE);
seq_printf(m, "file_dirty %llu\n",
- (u64)stat[NR_FILE_DIRTY] * PAGE_SIZE);
+ (u64)acc.stat[NR_FILE_DIRTY] * PAGE_SIZE);
seq_printf(m, "file_writeback %llu\n",
- (u64)stat[NR_WRITEBACK] * PAGE_SIZE);
-
- for (i = 0; i < NR_LRU_LISTS; i++) {
- struct mem_cgroup *mi;
- unsigned long val = 0;
+ (u64)acc.stat[NR_WRITEBACK] * PAGE_SIZE);
- for_each_mem_cgroup_tree(mi, memcg)
- val += mem_cgroup_nr_lru_pages(mi, BIT(i));
- seq_printf(m, "%s %llu\n",
- mem_cgroup_lru_names[i], (u64)val * PAGE_SIZE);
- }
+ for (i = 0; i < NR_LRU_LISTS; i++)
+ seq_printf(m, "%s %llu\n", mem_cgroup_lru_names[i],
+ (u64)acc.lru_pages[i] * PAGE_SIZE);
seq_printf(m, "slab_reclaimable %llu\n",
- (u64)stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
+ (u64)acc.stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
seq_printf(m, "slab_unreclaimable %llu\n",
- (u64)stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
+ (u64)acc.stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
/* Accumulated memory events */
- seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
- seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
+ seq_printf(m, "pgfault %lu\n", acc.events[PGFAULT]);
+ seq_printf(m, "pgmajfault %lu\n", acc.events[PGMAJFAULT]);
- seq_printf(m, "pgrefill %lu\n", events[PGREFILL]);
- seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] +
- events[PGSCAN_DIRECT]);
- seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] +
- events[PGSTEAL_DIRECT]);
- seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]);
- seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]);
- seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]);
- seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]);
+ seq_printf(m, "pgrefill %lu\n", acc.events[PGREFILL]);
+ seq_printf(m, "pgscan %lu\n", acc.events[PGSCAN_KSWAPD] +
+ acc.events[PGSCAN_DIRECT]);
+ seq_printf(m, "pgsteal %lu\n", acc.events[PGSTEAL_KSWAPD] +
+ acc.events[PGSTEAL_DIRECT]);
+ seq_printf(m, "pgactivate %lu\n", acc.events[PGACTIVATE]);
+ seq_printf(m, "pgdeactivate %lu\n", acc.events[PGDEACTIVATE]);
+ seq_printf(m, "pglazyfree %lu\n", acc.events[PGLAZYFREE]);
+ seq_printf(m, "pglazyfreed %lu\n", acc.events[PGLAZYFREED]);
seq_printf(m, "workingset_refault %lu\n",
- stat[WORKINGSET_REFAULT]);
+ acc.stat[WORKINGSET_REFAULT]);
seq_printf(m, "workingset_activate %lu\n",
- stat[WORKINGSET_ACTIVATE]);
+ acc.stat[WORKINGSET_ACTIVATE]);
seq_printf(m, "workingset_nodereclaim %lu\n",
- stat[WORKINGSET_NODERECLAIM]);
+ acc.stat[WORKINGSET_NODERECLAIM]);
+
+ return 0;
+}
+
+static int memory_oom_group_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+ seq_printf(m, "%d\n", memcg->oom_group);
return 0;
}
+static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ int ret, oom_group;
+
+ buf = strstrip(buf);
+ if (!buf)
+ return -EINVAL;
+
+ ret = kstrtoint(buf, 0, &oom_group);
+ if (ret)
+ return ret;
+
+ if (oom_group != 0 && oom_group != 1)
+ return -EINVAL;
+
+ memcg->oom_group = oom_group;
+
+ return nbytes;
+}
+
static struct cftype memory_files[] = {
{
.name = "current",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_stat_show,
},
+ {
+ .name = "oom.group",
+ .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE,
+ .seq_show = memory_oom_group_show,
+ .write = memory_oom_group_write,
+ },
{ } /* terminate */
};
* R/W the page; let's pray that the page has been
* used and will be freed some time later.
* In fact it's dangerous to directly bump up page count from 0,
- * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+ * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
*/
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
if (is_free_buddy_page(p)) {
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
{
struct pglist_data *pgdat;
- unsigned long zones_size[MAX_NR_ZONES] = {0};
- unsigned long zholes_size[MAX_NR_ZONES] = {0};
unsigned long start_pfn = PFN_DOWN(start);
pgdat = NODE_DATA(nid);
/* we can use NODE_DATA(nid) from here */
+ pgdat->node_id = nid;
+ pgdat->node_start_pfn = start_pfn;
+
/* init node's zones as empty zones, we don't have any present pages.*/
- free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+ free_area_init_core_hotplug(nid);
pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
/*
*/
build_all_zonelists(pgdat);
- /*
- * zone->managed_pages is set to an approximate value in
- * free_area_init_core(), which will cause
- * /sys/device/system/node/nodeX/meminfo has wrong data.
- * So reset it to 0 before any memory is onlined.
- */
- reset_node_managed_pages(pgdat);
-
/*
* When memory is hot-added, all the memory is in offline state. So
* clear all zones' present_pages because they will be updated in
* online_pages() and offline_pages().
*/
+ reset_node_managed_pages(pgdat);
reset_node_present_pages(pgdat);
return pgdat;
zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK];
z = first_zones_zonelist(zonelist, highest_zoneidx,
&policy->v.nodes);
- return z->zone ? z->zone->node : node;
+ return z->zone ? zone_to_nid(z->zone) : node;
}
default:
node_zonelist(numa_node_id(), GFP_HIGHUSER),
gfp_zone(GFP_HIGHUSER),
&pol->v.nodes);
- polnid = z->zone->node;
+ polnid = zone_to_nid(z->zone);
break;
default:
goto put_new;
/* Create pseudo-vma that contains just the policy */
- memset(&pvma, 0, sizeof(struct vm_area_struct));
vma_init(&pvma, NULL);
pvma.vm_end = TASK_SIZE; /* policy covers entire file */
mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
/**
* mempool_init - initialize a memory pool
+ * @pool: pointer to the memory pool that should be initialized
* @min_nr: the minimum number of elements guaranteed to be
* allocated for this pool.
* @alloc_fn: user-defined element-allocation function.
zone->name);
/* Iterate the zonelist */
- for_each_zone_zonelist(zone, z, zonelist, zoneid) {
-#ifdef CONFIG_NUMA
- pr_cont("%d:%s ", zone->node, zone->name);
-#else
- pr_cont("0:%s ", zone->name);
-#endif /* CONFIG_NUMA */
- }
+ for_each_zone_zonelist(zone, z, zonelist, zoneid)
+ pr_cont("%d:%s ", zone_to_nid(zone), zone->name);
pr_cont("\n");
}
}
* which clears VM_LOCKED, otherwise the oom reaper cannot
* reliably test it.
*/
- mutex_lock(&oom_lock);
- __oom_reap_task_mm(mm);
- mutex_unlock(&oom_lock);
+ (void)__oom_reap_task_mm(mm);
set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
srcu_read_unlock(&srcu, id);
}
-void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
- unsigned long start, unsigned long end)
+int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ bool blockable)
{
struct mmu_notifier *mn;
+ int ret = 0;
int id;
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
- if (mn->ops->invalidate_range_start)
- mn->ops->invalidate_range_start(mn, mm, start, end);
+ if (mn->ops->invalidate_range_start) {
+ int _ret = mn->ops->invalidate_range_start(mn, mm, start, end, blockable);
+ if (_ret) {
+ pr_info("%pS callback failed with %d in %sblockable context.\n",
+ mn->ops->invalidate_range_start, _ret,
+ !blockable ? "non-" : "");
+ ret = _ret;
+ }
+ }
}
srcu_read_unlock(&srcu, id);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);
struct task_struct *p;
struct task_struct *task;
- pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
+ pr_info("Tasks state (memory values in pages):\n");
+ pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
rcu_read_lock();
for_each_process(p) {
if (oom_unkillable_task(p, memcg, nodemask))
continue;
}
- pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
+ pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
task->pid, from_kuid(&init_user_ns, task_uid(task)),
task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
mm_pgtables_bytes(task->mm),
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
-void __oom_reap_task_mm(struct mm_struct *mm)
+bool __oom_reap_task_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ bool ret = true;
/*
* Tell all users of get_user/copy_from_user etc... that the content
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, mm, start, end);
- mmu_notifier_invalidate_range_start(mm, start, end);
+ if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
+ ret = false;
+ continue;
+ }
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
+
+ return ret;
}
+/*
+ * Reaps the address space of the give task.
+ *
+ * Returns true on success and false if none or part of the address space
+ * has been reclaimed and the caller should retry later.
+ */
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true;
- /*
- * We have to make sure to not race with the victim exit path
- * and cause premature new oom victim selection:
- * oom_reap_task_mm exit_mm
- * mmget_not_zero
- * mmput
- * atomic_dec_and_test
- * exit_oom_victim
- * [...]
- * out_of_memory
- * select_bad_process
- * # no TIF_MEMDIE task selects new victim
- * unmap_page_range # frees some memory
- */
- mutex_lock(&oom_lock);
-
if (!down_read_trylock(&mm->mmap_sem)) {
- ret = false;
trace_skip_task_reaping(tsk->pid);
- goto unlock_oom;
- }
-
- /*
- * If the mm has invalidate_{start,end}() notifiers that could block,
- * sleep to give the oom victim some more time.
- * TODO: we really want to get rid of this ugly hack and make sure that
- * notifiers cannot block for unbounded amount of time
- */
- if (mm_has_blockable_invalidate_notifiers(mm)) {
- up_read(&mm->mmap_sem);
- schedule_timeout_idle(HZ);
- goto unlock_oom;
+ return false;
}
/*
* down_write();up_write() cycle in exit_mmap().
*/
if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
- up_read(&mm->mmap_sem);
trace_skip_task_reaping(tsk->pid);
- goto unlock_oom;
+ goto out_unlock;
}
trace_start_task_reaping(tsk->pid);
- __oom_reap_task_mm(mm);
+ /* failed to reap part of the address space. Try again later */
+ ret = __oom_reap_task_mm(mm);
+ if (!ret)
+ goto out_finish;
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
K(get_mm_counter(mm, MM_FILEPAGES)),
K(get_mm_counter(mm, MM_SHMEMPAGES)));
+out_finish:
+ trace_finish_task_reaping(tsk->pid);
+out_unlock:
up_read(&mm->mmap_sem);
- trace_finish_task_reaping(tsk->pid);
-unlock_oom:
- mutex_unlock(&oom_lock);
return ret;
}
return ret;
}
-static void oom_kill_process(struct oom_control *oc, const char *message)
+static void __oom_kill_process(struct task_struct *victim)
{
- struct task_struct *p = oc->chosen;
- unsigned int points = oc->chosen_points;
- struct task_struct *victim = p;
- struct task_struct *child;
- struct task_struct *t;
+ struct task_struct *p;
struct mm_struct *mm;
- unsigned int victim_points = 0;
- static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
bool can_oom_reap = true;
- /*
- * If the task is already exiting, don't alarm the sysadmin or kill
- * its children or threads, just give it access to memory reserves
- * so it can die quickly
- */
- task_lock(p);
- if (task_will_free_mem(p)) {
- mark_oom_victim(p);
- wake_oom_reaper(p);
- task_unlock(p);
- put_task_struct(p);
- return;
- }
- task_unlock(p);
-
- if (__ratelimit(&oom_rs))
- dump_header(oc, p);
-
- pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
- message, task_pid_nr(p), p->comm, points);
-
- /*
- * If any of p's children has a different mm and is eligible for kill,
- * the one with the highest oom_badness() score is sacrificed for its
- * parent. This attempts to lose the minimal amount of work done while
- * still freeing memory.
- */
- read_lock(&tasklist_lock);
- for_each_thread(p, t) {
- list_for_each_entry(child, &t->children, sibling) {
- unsigned int child_points;
-
- if (process_shares_mm(child, p->mm))
- continue;
- /*
- * oom_badness() returns 0 if the thread is unkillable
- */
- child_points = oom_badness(child,
- oc->memcg, oc->nodemask, oc->totalpages);
- if (child_points > victim_points) {
- put_task_struct(victim);
- victim = child;
- victim_points = child_points;
- get_task_struct(victim);
- }
- }
- }
- read_unlock(&tasklist_lock);
-
p = find_lock_task_mm(victim);
if (!p) {
put_task_struct(victim);
}
#undef K
+/*
+ * Kill provided task unless it's secured by setting
+ * oom_score_adj to OOM_SCORE_ADJ_MIN.
+ */
+static int oom_kill_memcg_member(struct task_struct *task, void *unused)
+{
+ if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+ get_task_struct(task);
+ __oom_kill_process(task);
+ }
+ return 0;
+}
+
+static void oom_kill_process(struct oom_control *oc, const char *message)
+{
+ struct task_struct *p = oc->chosen;
+ unsigned int points = oc->chosen_points;
+ struct task_struct *victim = p;
+ struct task_struct *child;
+ struct task_struct *t;
+ struct mem_cgroup *oom_group;
+ unsigned int victim_points = 0;
+ static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ /*
+ * If the task is already exiting, don't alarm the sysadmin or kill
+ * its children or threads, just give it access to memory reserves
+ * so it can die quickly
+ */
+ task_lock(p);
+ if (task_will_free_mem(p)) {
+ mark_oom_victim(p);
+ wake_oom_reaper(p);
+ task_unlock(p);
+ put_task_struct(p);
+ return;
+ }
+ task_unlock(p);
+
+ if (__ratelimit(&oom_rs))
+ dump_header(oc, p);
+
+ pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
+ message, task_pid_nr(p), p->comm, points);
+
+ /*
+ * If any of p's children has a different mm and is eligible for kill,
+ * the one with the highest oom_badness() score is sacrificed for its
+ * parent. This attempts to lose the minimal amount of work done while
+ * still freeing memory.
+ */
+ read_lock(&tasklist_lock);
+ for_each_thread(p, t) {
+ list_for_each_entry(child, &t->children, sibling) {
+ unsigned int child_points;
+
+ if (process_shares_mm(child, p->mm))
+ continue;
+ /*
+ * oom_badness() returns 0 if the thread is unkillable
+ */
+ child_points = oom_badness(child,
+ oc->memcg, oc->nodemask, oc->totalpages);
+ if (child_points > victim_points) {
+ put_task_struct(victim);
+ victim = child;
+ victim_points = child_points;
+ get_task_struct(victim);
+ }
+ }
+ }
+ read_unlock(&tasklist_lock);
+
+ /*
+ * Do we need to kill the entire memory cgroup?
+ * Or even one of the ancestor memory cgroups?
+ * Check this out before killing the victim task.
+ */
+ oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
+
+ __oom_kill_process(victim);
+
+ /*
+ * If necessary, kill all tasks in the selected memory cgroup.
+ */
+ if (oom_group) {
+ mem_cgroup_print_oom_group(oom_group);
+ mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
+ mem_cgroup_put(oom_group);
+ }
+}
+
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
if (!static_branch_likely(&vm_numa_stat_key))
return;
- if (z->node != numa_node_id())
+ if (zone_to_nid(z) != numa_node_id())
local_stat = NUMA_OTHER;
- if (z->node == preferred_zone->node)
+ if (zone_to_nid(z) == zone_to_nid(preferred_zone))
__inc_numa_state(z, NUMA_HIT);
else {
__inc_numa_state(z, NUMA_MISS);
z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
gfp_zone(GFP_KERNEL),
NULL);
- return z->zone->node;
+ return zone_to_nid(z->zone);
}
#endif
return usemapsize / 8;
}
-static void __init setup_usemap(struct pglist_data *pgdat,
+static void __ref setup_usemap(struct pglist_data *pgdat,
struct zone *zone,
unsigned long zone_start_pfn,
unsigned long zonesize)
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __paginginit set_pageblock_order(void)
+void __init set_pageblock_order(void)
{
unsigned int order;
* include/linux/pageblock-flags.h for the values of pageblock_order based on
* the kernel config
*/
-void __paginginit set_pageblock_order(void)
+void __init set_pageblock_order(void)
{
}
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
-static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
- unsigned long present_pages)
+static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
+ unsigned long present_pages)
{
unsigned long pages = spanned_pages;
return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
}
-/*
- * Set up the zone data structures:
- * - mark all pages reserved
- * - mark all memory queues empty
- * - clear the memory bitmaps
- *
- * NOTE: pgdat should get zeroed by caller.
- */
-static void __paginginit free_area_init_core(struct pglist_data *pgdat)
-{
- enum zone_type j;
- int nid = pgdat->node_id;
-
- pgdat_resize_init(pgdat);
#ifdef CONFIG_NUMA_BALANCING
+static void pgdat_init_numabalancing(struct pglist_data *pgdat)
+{
spin_lock_init(&pgdat->numabalancing_migrate_lock);
pgdat->numabalancing_migrate_nr_pages = 0;
pgdat->numabalancing_migrate_next_window = jiffies;
+}
+#else
+static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}
#endif
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pgdat_init_split_queue(struct pglist_data *pgdat)
+{
spin_lock_init(&pgdat->split_queue_lock);
INIT_LIST_HEAD(&pgdat->split_queue);
pgdat->split_queue_len = 0;
+}
+#else
+static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
#endif
- init_waitqueue_head(&pgdat->kswapd_wait);
- init_waitqueue_head(&pgdat->pfmemalloc_wait);
+
#ifdef CONFIG_COMPACTION
+static void pgdat_init_kcompactd(struct pglist_data *pgdat)
+{
init_waitqueue_head(&pgdat->kcompactd_wait);
+}
+#else
+static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
#endif
+
+static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
+{
+ pgdat_resize_init(pgdat);
+
+ pgdat_init_numabalancing(pgdat);
+ pgdat_init_split_queue(pgdat);
+ pgdat_init_kcompactd(pgdat);
+
+ init_waitqueue_head(&pgdat->kswapd_wait);
+ init_waitqueue_head(&pgdat->pfmemalloc_wait);
+
pgdat_page_ext_init(pgdat);
spin_lock_init(&pgdat->lru_lock);
lruvec_init(node_lruvec(pgdat));
+}
+
+static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
+ unsigned long remaining_pages)
+{
+ zone->managed_pages = remaining_pages;
+ zone_set_nid(zone, nid);
+ zone->name = zone_names[idx];
+ zone->zone_pgdat = NODE_DATA(nid);
+ spin_lock_init(&zone->lock);
+ zone_seqlock_init(zone);
+ zone_pcp_init(zone);
+}
+
+/*
+ * Set up the zone data structures
+ * - init pgdat internals
+ * - init all zones belonging to this node
+ *
+ * NOTE: this function is only called during memory hotplug
+ */
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __ref free_area_init_core_hotplug(int nid)
+{
+ enum zone_type z;
+ pg_data_t *pgdat = NODE_DATA(nid);
+ pgdat_init_internals(pgdat);
+ for (z = 0; z < MAX_NR_ZONES; z++)
+ zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+}
+#endif
+
+/*
+ * Set up the zone data structures:
+ * - mark all pages reserved
+ * - mark all memory queues empty
+ * - clear the memory bitmaps
+ *
+ * NOTE: pgdat should get zeroed by caller.
+ * NOTE: this function is only called during early init.
+ */
+static void __init free_area_init_core(struct pglist_data *pgdat)
+{
+ enum zone_type j;
+ int nid = pgdat->node_id;
+
+ pgdat_init_internals(pgdat);
pgdat->per_cpu_nodestats = &boot_nodestats;
for (j = 0; j < MAX_NR_ZONES; j++) {
* when the bootmem allocator frees pages into the buddy system.
* And all highmem pages will be managed by the buddy system.
*/
- zone->managed_pages = freesize;
-#ifdef CONFIG_NUMA
- zone->node = nid;
-#endif
- zone->name = zone_names[j];
- zone->zone_pgdat = pgdat;
- spin_lock_init(&zone->lock);
- zone_seqlock_init(zone);
- zone_pcp_init(zone);
+ zone_init_internals(zone, j, nid, freesize);
if (!size)
continue;
static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
#endif /* CONFIG_FLAT_NODE_MEM_MAP */
-void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
- unsigned long node_start_pfn, unsigned long *zholes_size)
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
+{
+ /*
+ * We start only with one section of pages, more pages are added as
+ * needed until the rest of deferred pages are initialized.
+ */
+ pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
+ pgdat->node_spanned_pages);
+ pgdat->first_deferred_pfn = ULONG_MAX;
+}
+#else
+static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
+#endif
+
+void __init free_area_init_node(int nid, unsigned long *zones_size,
+ unsigned long node_start_pfn,
+ unsigned long *zholes_size)
{
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long start_pfn = 0;
zones_size, zholes_size);
alloc_node_mem_map(pgdat);
+ pgdat_set_deferred_range(pgdat);
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
- /*
- * We start only with one section of pages, more pages are added as
- * needed until the rest of deferred pages are initialized.
- */
- pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
- pgdat->node_spanned_pages);
- pgdat->first_deferred_pfn = ULONG_MAX;
-#endif
free_area_init_core(pgdat);
}
* may be accessed (for example page_to_pfn() on some configuration accesses
* flags). We must explicitly zero those struct pages.
*/
-void __paginginit zero_resv_unavail(void)
+void __init zero_resv_unavail(void)
{
phys_addr_t start, end;
unsigned long pfn;
*/
int pcpu_nr_empty_pop_pages;
+/*
+ * The number of populated pages in use by the allocator, protected by
+ * pcpu_lock. This number is kept per a unit per chunk (i.e. when a page gets
+ * allocated/deallocated, it is allocated/deallocated in all units of a chunk
+ * and increments/decrements this count by 1).
+ */
+static unsigned long pcpu_nr_populated;
+
/*
* Balance work is used to populate or destroy chunks asynchronously. We
* try to keep the number of populated free pages between
bitmap_set(chunk->populated, page_start, nr);
chunk->nr_populated += nr;
+ pcpu_nr_populated += nr;
if (!for_alloc) {
chunk->nr_empty_pop_pages += nr;
chunk->nr_populated -= nr;
chunk->nr_empty_pop_pages -= nr;
pcpu_nr_empty_pop_pages -= nr;
+ pcpu_nr_populated -= nr;
}
/*
pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
pcpu_chunk_relocate(pcpu_first_chunk, -1);
+ /* include all regions of the first chunk */
+ pcpu_nr_populated += PFN_DOWN(size_sum);
+
pcpu_stats_chunk_alloc();
trace_percpu_create_chunk(base_addr);
#endif /* CONFIG_SMP */
+/*
+ * pcpu_nr_pages - calculate total number of populated backing pages
+ *
+ * This reflects the number of pages populated to back chunks. Metadata is
+ * excluded in the number exposed in meminfo as the number of backing pages
+ * scales with the number of cpus and can quickly outweigh the memory used for
+ * metadata. It also keeps this calculation nice and simple.
+ *
+ * RETURNS:
+ * Total number of populated backing pages in use by the allocator.
+ */
+unsigned long pcpu_nr_pages(void)
+{
+ return pcpu_nr_populated * pcpu_nr_units;
+}
+
/*
* Percpu allocator is initialized early during boot when neither slab or
* workqueue is available. Plug async management until everything is up
struct shmem_inode_info *info, pgoff_t index)
{
/* Create a pseudo vma that just contains the policy */
- memset(vma, 0, sizeof(*vma));
vma_init(vma, NULL);
/* Bias interleave by inode number to distribute better across nodes */
vma->vm_pgoff = index + info->vfs_inode.i_ino;
cache->cur = 0;
if (swap_slot_cache_active)
- cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, false,
- cache->slots);
+ cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
+ cache->slots, 1);
return cache->nr;
}
if (PageTransHuge(page)) {
if (IS_ENABLED(CONFIG_THP_SWAP))
- get_swap_pages(1, true, &entry);
+ get_swap_pages(1, &entry, HPAGE_PMD_NR);
goto out;
}
goto out;
}
- get_swap_pages(1, false, &entry);
+ get_swap_pages(1, &entry, 1);
out:
if (mem_cgroup_try_charge_swap(page, entry)) {
put_swap_page(page, entry);
#ifdef CONFIG_THP_SWAP
#define SWAPFILE_CLUSTER HPAGE_PMD_NR
+
+#define swap_entry_size(size) (size)
#else
#define SWAPFILE_CLUSTER 256
+
+/*
+ * Define swap_entry_size() as constant to let compiler to optimize
+ * out some code if !CONFIG_THP_SWAP
+ */
+#define swap_entry_size(size) 1
#endif
#define LATENCY_LIMIT 256
static inline bool cluster_is_huge(struct swap_cluster_info *info)
{
- return info->flags & CLUSTER_FLAG_HUGE;
+ if (IS_ENABLED(CONFIG_THP_SWAP))
+ return info->flags & CLUSTER_FLAG_HUGE;
+ return false;
}
static inline void cluster_clear_huge(struct swap_cluster_info *info)
spin_unlock(&ci->lock);
}
+/*
+ * Determine the locking method in use for this device. Return
+ * swap_cluster_info if SSD-style cluster-based locking is in place.
+ */
static inline struct swap_cluster_info *lock_cluster_or_swap_info(
- struct swap_info_struct *si,
- unsigned long offset)
+ struct swap_info_struct *si, unsigned long offset)
{
struct swap_cluster_info *ci;
+ /* Try to use fine-grained SSD-style locking if available: */
ci = lock_cluster(si, offset);
+ /* Otherwise, fall back to traditional, coarse locking: */
if (!ci)
spin_lock(&si->lock);
return n_ret;
}
-#ifdef CONFIG_THP_SWAP
static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
{
unsigned long idx;
unsigned long offset, i;
unsigned char *map;
+ /*
+ * Should not even be attempting cluster allocations when huge
+ * page swap is disabled. Warn and fail the allocation.
+ */
+ if (!IS_ENABLED(CONFIG_THP_SWAP)) {
+ VM_WARN_ON_ONCE(1);
+ return 0;
+ }
+
if (cluster_list_empty(&si->free_clusters))
return 0;
unlock_cluster(ci);
swap_range_free(si, offset, SWAPFILE_CLUSTER);
}
-#else
-static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
-{
- VM_WARN_ON_ONCE(1);
- return 0;
-}
-#endif /* CONFIG_THP_SWAP */
static unsigned long scan_swap_map(struct swap_info_struct *si,
unsigned char usage)
}
-int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
+int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
{
- unsigned long nr_pages = cluster ? SWAPFILE_CLUSTER : 1;
+ unsigned long size = swap_entry_size(entry_size);
struct swap_info_struct *si, *next;
long avail_pgs;
int n_ret = 0;
int node;
/* Only single cluster request supported */
- WARN_ON_ONCE(n_goal > 1 && cluster);
+ WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER);
- avail_pgs = atomic_long_read(&nr_swap_pages) / nr_pages;
+ avail_pgs = atomic_long_read(&nr_swap_pages) / size;
if (avail_pgs <= 0)
goto noswap;
if (n_goal > avail_pgs)
n_goal = avail_pgs;
- atomic_long_sub(n_goal * nr_pages, &nr_swap_pages);
+ atomic_long_sub(n_goal * size, &nr_swap_pages);
spin_lock(&swap_avail_lock);
spin_unlock(&si->lock);
goto nextsi;
}
- if (cluster) {
+ if (size == SWAPFILE_CLUSTER) {
if (!(si->flags & SWP_FILE))
n_ret = swap_alloc_cluster(si, swp_entries);
} else
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
n_goal, swp_entries);
spin_unlock(&si->lock);
- if (n_ret || cluster)
+ if (n_ret || size == SWAPFILE_CLUSTER)
goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n",
si->type);
check_out:
if (n_ret < n_goal)
- atomic_long_add((long)(n_goal - n_ret) * nr_pages,
+ atomic_long_add((long)(n_goal - n_ret) * size,
&nr_swap_pages);
noswap:
return n_ret;
return p;
}
-static unsigned char __swap_entry_free(struct swap_info_struct *p,
- swp_entry_t entry, unsigned char usage)
+static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
+ unsigned long offset,
+ unsigned char usage)
{
- struct swap_cluster_info *ci;
- unsigned long offset = swp_offset(entry);
unsigned char count;
unsigned char has_cache;
- ci = lock_cluster_or_swap_info(p, offset);
-
count = p->swap_map[offset];
has_cache = count & SWAP_HAS_CACHE;
usage = count | has_cache;
p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
+ return usage;
+}
+
+static unsigned char __swap_entry_free(struct swap_info_struct *p,
+ swp_entry_t entry, unsigned char usage)
+{
+ struct swap_cluster_info *ci;
+ unsigned long offset = swp_offset(entry);
+
+ ci = lock_cluster_or_swap_info(p, offset);
+ usage = __swap_entry_free_locked(p, offset, usage);
unlock_cluster_or_swap_info(p, ci);
return usage;
/*
* Called after dropping swapcache to decrease refcnt to swap entries.
*/
-static void swapcache_free(swp_entry_t entry)
-{
- struct swap_info_struct *p;
-
- p = _swap_info_get(entry);
- if (p) {
- if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
- free_swap_slot(entry);
- }
-}
-
-#ifdef CONFIG_THP_SWAP
-static void swapcache_free_cluster(swp_entry_t entry)
+void put_swap_page(struct page *page, swp_entry_t entry)
{
unsigned long offset = swp_offset(entry);
unsigned long idx = offset / SWAPFILE_CLUSTER;
unsigned char *map;
unsigned int i, free_entries = 0;
unsigned char val;
+ int size = swap_entry_size(hpage_nr_pages(page));
si = _swap_info_get(entry);
if (!si)
return;
- ci = lock_cluster(si, offset);
- VM_BUG_ON(!cluster_is_huge(ci));
- map = si->swap_map + offset;
- for (i = 0; i < SWAPFILE_CLUSTER; i++) {
- val = map[i];
- VM_BUG_ON(!(val & SWAP_HAS_CACHE));
- if (val == SWAP_HAS_CACHE)
- free_entries++;
- }
- if (!free_entries) {
- for (i = 0; i < SWAPFILE_CLUSTER; i++)
- map[i] &= ~SWAP_HAS_CACHE;
+ ci = lock_cluster_or_swap_info(si, offset);
+ if (size == SWAPFILE_CLUSTER) {
+ VM_BUG_ON(!cluster_is_huge(ci));
+ map = si->swap_map + offset;
+ for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+ val = map[i];
+ VM_BUG_ON(!(val & SWAP_HAS_CACHE));
+ if (val == SWAP_HAS_CACHE)
+ free_entries++;
+ }
+ cluster_clear_huge(ci);
+ if (free_entries == SWAPFILE_CLUSTER) {
+ unlock_cluster_or_swap_info(si, ci);
+ spin_lock(&si->lock);
+ ci = lock_cluster(si, offset);
+ memset(map, 0, SWAPFILE_CLUSTER);
+ unlock_cluster(ci);
+ mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
+ swap_free_cluster(si, idx);
+ spin_unlock(&si->lock);
+ return;
+ }
}
- cluster_clear_huge(ci);
- unlock_cluster(ci);
- if (free_entries == SWAPFILE_CLUSTER) {
- spin_lock(&si->lock);
- ci = lock_cluster(si, offset);
- memset(map, 0, SWAPFILE_CLUSTER);
- unlock_cluster(ci);
- mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
- swap_free_cluster(si, idx);
- spin_unlock(&si->lock);
- } else if (free_entries) {
- for (i = 0; i < SWAPFILE_CLUSTER; i++, entry.val++) {
- if (!__swap_entry_free(si, entry, SWAP_HAS_CACHE))
- free_swap_slot(entry);
+ for (i = 0; i < size; i++, entry.val++) {
+ if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
+ unlock_cluster_or_swap_info(si, ci);
+ free_swap_slot(entry);
+ if (i == size - 1)
+ return;
+ lock_cluster_or_swap_info(si, offset);
}
}
+ unlock_cluster_or_swap_info(si, ci);
}
+#ifdef CONFIG_THP_SWAP
int split_swap_cluster(swp_entry_t entry)
{
struct swap_info_struct *si;
unlock_cluster(ci);
return 0;
}
-#else
-static inline void swapcache_free_cluster(swp_entry_t entry)
-{
-}
-#endif /* CONFIG_THP_SWAP */
-
-void put_swap_page(struct page *page, swp_entry_t entry)
-{
- if (!PageTransHuge(page))
- swapcache_free(entry);
- else
- swapcache_free_cluster(entry);
-}
+#endif
static int swp_entry_cmp(const void *ent1, const void *ent2)
{
return count;
}
-#ifdef CONFIG_THP_SWAP
static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
swp_entry_t entry)
{
ci = lock_cluster_or_swap_info(si, offset);
if (!ci || !cluster_is_huge(ci)) {
- if (map[roffset] != SWAP_HAS_CACHE)
+ if (swap_count(map[roffset]))
ret = true;
goto unlock_out;
}
for (i = 0; i < SWAPFILE_CLUSTER; i++) {
- if (map[offset + i] != SWAP_HAS_CACHE) {
+ if (swap_count(map[offset + i])) {
ret = true;
break;
}
swp_entry_t entry;
struct swap_info_struct *si;
- if (likely(!PageTransCompound(page)))
+ if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page)))
return page_swapcount(page) != 0;
page = compound_head(page);
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);
- if (likely(!PageTransCompound(page))) {
- mapcount = atomic_read(&page->_mapcount) + 1;
- if (total_mapcount)
- *total_mapcount = mapcount;
+ if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
+ mapcount = page_trans_huge_mapcount(page, total_mapcount);
if (PageSwapCache(page))
swapcount = page_swapcount(page);
if (total_swapcount)
return map_swapcount;
}
-#else
-#define swap_page_trans_huge_swapped(si, entry) swap_swapcount(si, entry)
-#define page_swapped(page) (page_swapcount(page) != 0)
-
-static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
- int *total_swapcount)
-{
- int mapcount, swapcount = 0;
-
- /* hugetlbfs shouldn't call it */
- VM_BUG_ON_PAGE(PageHuge(page), page);
-
- mapcount = page_trans_huge_mapcount(page, total_mapcount);
- if (PageSwapCache(page))
- swapcount = page_swapcount(page);
- if (total_swapcount)
- *total_swapcount = swapcount;
- return mapcount + swapcount;
-}
-#endif
/*
* We can write to an anon page without COW if there are no other references
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
#ifdef CONFIG_MEMCG_KMEM
- idr_replace(&shrinker_idr, shrinker, shrinker->id);
+ if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+ idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
up_write(&shrinker_rwsem);
}
refcount = 2;
if (!page_ref_freeze(page, refcount))
goto cannot_free;
- /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
+ /* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
if (unlikely(PageDirty(page))) {
page_ref_unfreeze(page, refcount);
goto cannot_free;
use File::Basename;
use Cwd 'abs_path';
use Term::ANSIColor qw(:constants);
+use Encode qw(decode encode);
my $P = $0;
my $D = dirname(abs_path($P));
my $exit = 0;
+my $perl_version_ok = 1;
if ($^V && $^V lt $minimum_perl_version) {
+ $perl_version_ok = 0;
printf "$P: requires at least perl version %vd\n", $minimum_perl_version;
- if (!$ignore_perl_version) {
- exit(1);
- }
+ exit(1) if (!$ignore_perl_version);
}
#if no filenames are given, push '-' to read patch from stdin
__force|
__iomem|
__must_check|
- __init_refok|
__kprobes|
__ref|
+ __refconst|
+ __refdata|
__rcu|
__private
}x;
return $status =~ /obsolete/i;
}
+sub is_SPDX_License_valid {
+ my ($license) = @_;
+
+ return 1 if (!$tree || which("python") eq "" || !(-e "$root/scripts/spdxcheck.py") || !(-e "$root/.git"));
+
+ my $root_path = abs_path($root);
+ my $status = `cd "$root_path"; echo "$license" | python scripts/spdxcheck.py -`;
+ return 0 if ($status ne "");
+ return 1;
+}
+
my $camelcase_seeded = 0;
sub seed_camelcase_includes {
return if ($camelcase_seeded);
hash_show_words(\%use_type, "Used");
hash_show_words(\%ignore_type, "Ignored");
- if ($^V lt 5.10.0) {
+ if (!$perl_version_ok) {
print << "EOM"
NOTE: perl $^V is not modern enough to detect all possible issues.
- An upgrade to at least perl v5.10.0 is suggested.
+ An upgrade to at least perl $minimum_perl_version is suggested.
EOM
}
if ($exit) {
our $clean = 1;
my $signoff = 0;
+ my $author = '';
+ my $authorsignoff = 0;
my $is_patch = 0;
+ my $is_binding_patch = -1;
my $in_header_lines = $file ? 0 : 1;
my $in_commit_log = 0; #Scanning lines before patch
my $has_commit_log = 0; #Encountered lines before patch
+ my $commit_log_lines = 0; #Number of commit log lines
my $commit_log_possible_stack_dump = 0;
my $commit_log_long_line = 0;
my $commit_log_has_diff = 0;
$check = $check_orig;
}
$checklicenseline = 1;
+
+ if ($realfile !~ /^MAINTAINERS/) {
+ my $last_binding_patch = $is_binding_patch;
+
+ $is_binding_patch = () = $realfile =~ m@^(?:Documentation/devicetree/|include/dt-bindings/)@;
+
+ if (($last_binding_patch != -1) &&
+ ($last_binding_patch ^ $is_binding_patch)) {
+ WARN("DT_SPLIT_BINDING_PATCH",
+ "DT binding docs and includes should be a separate patch. See: Documentation/devicetree/bindings/submitting-patches.txt\n");
+ }
+ }
+
next;
}
$cnt_lines++ if ($realcnt != 0);
+# Verify the existence of a commit log if appropriate
+# 2 is used because a $signature is counted in $commit_log_lines
+ if ($in_commit_log) {
+ if ($line !~ /^\s*$/) {
+ $commit_log_lines++; #could be a $signature
+ }
+ } elsif ($has_commit_log && $commit_log_lines < 2) {
+ WARN("COMMIT_MESSAGE",
+ "Missing commit description - Add an appropriate one\n");
+ $commit_log_lines = 2; #warn only once
+ }
+
# Check if the commit log has what seems like a diff which can confuse patch
if ($in_commit_log && !$commit_log_has_diff &&
(($line =~ m@^\s+diff\b.*a/[\w/]+@ &&
}
}
+# Check the patch for a From:
+ if (decode("MIME-Header", $line) =~ /^From:\s*(.*)/) {
+ $author = $1;
+ $author = encode("utf8", $author) if ($line =~ /=\?utf-8\?/i);
+ $author =~ s/"//g;
+ }
+
# Check the patch for a signoff:
if ($line =~ /^\s*signed-off-by:/i) {
$signoff++;
$in_commit_log = 0;
+ if ($author ne '') {
+ my $l = $line;
+ $l =~ s/"//g;
+ if ($l =~ /^\s*signed-off-by:\s*\Q$author\E/i) {
+ $authorsignoff = 1;
+ }
+ }
}
# Check if MAINTAINERS is being updated. If so, there's probably no need to
if ($comment !~ /^$/ &&
$rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) {
- WARN("SPDX_LICENSE_TAG",
- "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+ WARN("SPDX_LICENSE_TAG",
+ "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+ } elsif ($rawline =~ /(SPDX-License-Identifier: .*)/) {
+ my $spdx_license = $1;
+ if (!is_SPDX_License_valid($spdx_license)) {
+ WARN("SPDX_LICENSE_TAG",
+ "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr);
+ }
}
}
}
}
# check indentation starts on a tab stop
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$sline =~ /^\+\t+( +)(?:$c90_Keywords\b|\{\s*$|\}\s*(?:else\b|while\b|\s*$)|$Declare\s*$Ident\s*[;=])/) {
my $indent = length($1);
if ($indent % 8) {
}
# check multi-line statement indentation matches previous line
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
$prevline =~ /^\+(\t*)(.*)$/;
my $oldindent = $1;
"type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr);
}
+# check for unnecessary <signed> int declarations of short/long/long long
+ while ($sline =~ m{\b($TypeMisordered(\s*\*)*|$C90_int_types)\b}g) {
+ my $type = trim($1);
+ next if ($type !~ /\bint\b/);
+ next if ($type !~ /\b(?:short|long\s+long|long)\b/);
+ my $new_type = $type;
+ $new_type =~ s/\b\s*int\s*\b/ /;
+ $new_type =~ s/\b\s*(?:un)?signed\b\s*/ /;
+ $new_type =~ s/^const\s+//;
+ $new_type = "unsigned $new_type" if ($type =~ /\bunsigned\b/);
+ $new_type = "const $new_type" if ($type =~ /^const\b/);
+ $new_type =~ s/\s+/ /g;
+ $new_type = trim($new_type);
+ if (WARN("UNNECESSARY_INT",
+ "Prefer '$new_type' over '$type' as the int is unnecessary\n" . $herecurr) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s/\b\Q$type\E\b/$new_type/;
+ }
+ }
+
# check for static const char * arrays.
if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
WARN("STATIC_CONST_CHAR_ARRAY",
# function brace can't be on same line, except for #defines of do while,
# or if closed on same line
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$sline =~ /$Type\s*$Ident\s*$balanced_parens\s*\{/ &&
$sline !~ /\#\s*define\b.*do\s*\{/ &&
$sline !~ /}/) {
#need space before brace following if, while, etc
if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) ||
- $line =~ /do\{/) {
+ $line =~ /\b(?:else|do)\{/) {
if (ERROR("SPACING",
"space required before the open brace '{'\n" . $herecurr) &&
$fix) {
- $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/;
+ $fixed[$fixlinenr] =~ s/^(\+.*(?:do|else|\)))\{/$1 {/;
}
}
# check for unnecessary parentheses around comparisons in if uses
# when !drivers/staging or command-line uses --strict
if (($realfile !~ m@^(?:drivers/staging/)@ || $check_orig) &&
- $^V && $^V ge 5.10.0 && defined($stat) &&
+ $perl_version_ok && defined($stat) &&
$stat =~ /(^.\s*if\s*($balanced_parens))/) {
my $if_stat = $1;
my $test = substr($2, 1, -1);
# return is not a function
if (defined($stat) && $stat =~ /^.\s*return(\s*)\(/s) {
my $spacing = $1;
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$stat =~ /^.\s*return\s*($balanced_parens)\s*;\s*$/) {
my $value = $1;
$value = deparenthesize($value);
}
# if statements using unnecessary parentheses - ie: if ((foo == bar))
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /\bif\s*((?:\(\s*){2,})/) {
my $openparens = $1;
my $count = $openparens =~ tr@\(@\(@;
# avoid cases like "foo + BAR < baz"
# only fix matches surrounded by parentheses to avoid incorrect
# conversions like "FOO < baz() + 5" being "misfixed" to "baz() > FOO + 5"
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /^\+(.*)\b($Constant|[A-Z_][A-Z0-9_]*)\s*($Compare)\s*($LvalOrFunc)/) {
my $lead = $1;
my $const = $2;
if (defined $define_args && $define_args ne "") {
$define_args = substr($define_args, 1, length($define_args) - 2);
$define_args =~ s/\s*//g;
+ $define_args =~ s/\\\+?//g;
@def_args = split(",", $define_args);
}
# do {} while (0) macro tests:
# single-statement macros do not need to be enclosed in do while (0) loop,
# macro should not end with a semicolon
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$realfile !~ m@/vmlinux.lds.h$@ &&
$line =~ /^.\s*\#\s*define\s+$Ident(\()?/) {
my $ln = $linenr;
}
# concatenated string without spaces between elements
- if ($line =~ /$String[A-Z_]/ || $line =~ /[A-Za-z0-9_]$String/) {
- CHK("CONCATENATED_STRING",
- "Concatenated strings should use spaces between elements\n" . $herecurr);
+ if ($line =~ /$String[A-Za-z0-9_]/ || $line =~ /[A-Za-z0-9_]$String/) {
+ if (CHK("CONCATENATED_STRING",
+ "Concatenated strings should use spaces between elements\n" . $herecurr) &&
+ $fix) {
+ while ($line =~ /($String)/g) {
+ my $extracted_string = substr($rawline, $-[0], $+[0] - $-[0]);
+ $fixed[$fixlinenr] =~ s/\Q$extracted_string\E([A-Za-z0-9_])/$extracted_string $1/;
+ $fixed[$fixlinenr] =~ s/([A-Za-z0-9_])\Q$extracted_string\E/$1 $extracted_string/;
+ }
+ }
}
# uncoalesced string fragments
if ($line =~ /$String\s*"/) {
- WARN("STRING_FRAGMENTS",
- "Consecutive strings are generally better as a single string\n" . $herecurr);
+ if (WARN("STRING_FRAGMENTS",
+ "Consecutive strings are generally better as a single string\n" . $herecurr) &&
+ $fix) {
+ while ($line =~ /($String)(?=\s*")/g) {
+ my $extracted_string = substr($rawline, $-[0], $+[0] - $-[0]);
+ $fixed[$fixlinenr] =~ s/\Q$extracted_string\E\s*"/substr($extracted_string, 0, -1)/e;
+ }
+ }
}
# check for non-standard and hex prefixed decimal printf formats
# warn about #if 0
if ($line =~ /^.\s*\#\s*if\s+0\b/) {
- CHK("REDUNDANT_CODE",
- "if this code is redundant consider removing it\n" .
- $herecurr);
+ WARN("IF_0",
+ "Consider removing the code enclosed by this #if 0 and its #endif\n" . $herecurr);
+ }
+
+# warn about #if 1
+ if ($line =~ /^.\s*\#\s*if\s+1\b/) {
+ WARN("IF_1",
+ "Consider removing the #if 1 and its #endif\n" . $herecurr);
}
# check for needless "if (<foo>) fn(<foo>)" uses
}
# check for mask then right shift without a parentheses
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /$LvalOrFunc\s*\&\s*($LvalOrFunc)\s*>>/ &&
$4 !~ /^\&/) { # $LvalOrFunc may be &foo, ignore if so
WARN("MASK_THEN_SHIFT",
}
# check for pointer comparisons to NULL
- if ($^V && $^V ge 5.10.0) {
+ if ($perl_version_ok) {
while ($line =~ /\b$LvalOrFunc\s*(==|\!=)\s*NULL\b/g) {
my $val = $1;
my $equal = "!";
}
# Check for __attribute__ weak, or __weak declarations (may have link issues)
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /(?:$Declare|$DeclareMisordered)\s*$Ident\s*$balanced_parens\s*(?:$Attribute)?\s*;/ &&
($line =~ /\b__attribute__\s*\(\s*\(.*\bweak\b/ ||
$line =~ /\b__weak\b/)) {
}
# check for vsprintf extension %p<foo> misuses
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s &&
$1 !~ /^_*volatile_*$/) {
}
# Check for misused memsets
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*$FuncArg\s*\)/) {
}
# Check for memcpy(foo, bar, ETH_ALEN) that could be ether_addr_copy(foo, bar)
-# if ($^V && $^V ge 5.10.0 &&
+# if ($perl_version_ok &&
# defined $stat &&
# $stat =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
# if (WARN("PREFER_ETHER_ADDR_COPY",
# }
# Check for memcmp(foo, bar, ETH_ALEN) that could be ether_addr_equal*(foo, bar)
-# if ($^V && $^V ge 5.10.0 &&
+# if ($perl_version_ok &&
# defined $stat &&
# $stat =~ /^\+(?:.*?)\bmemcmp\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
# WARN("PREFER_ETHER_ADDR_EQUAL",
# check for memset(foo, 0x0, ETH_ALEN) that could be eth_zero_addr
# check for memset(foo, 0xFF, ETH_ALEN) that could be eth_broadcast_addr
-# if ($^V && $^V ge 5.10.0 &&
+# if ($perl_version_ok &&
# defined $stat &&
# $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
#
# }
# typecasts on min/max could be min_t/max_t
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?:.*?)\b(min|max)\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\)/) {
if (defined $2 || defined $7) {
}
# check usleep_range arguments
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?:.*?)\busleep_range\s*\(\s*($FuncArg)\s*,\s*($FuncArg)\s*\)/) {
my $min = $1;
}
# check for naked sscanf
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$line =~ /\bsscanf\b/ &&
($stat !~ /$Ident\s*=\s*sscanf\s*$balanced_parens/ &&
}
# check for simple sscanf that should be kstrto<foo>
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$line =~ /\bsscanf\b/) {
my $lc = $stat =~ tr@\n@@;
}
# check for function definitions
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^.\s*(?:$Storage\s+)?$Type\s*($Ident)\s*$balanced_parens\s*{/s) {
$context_function = $1;
# alloc style
# p = alloc(sizeof(struct foo), ...) should be p = alloc(sizeof(*p), ...)
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*([kv][mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) {
CHK("ALLOC_SIZEOF_STRUCT",
"Prefer $3(sizeof(*$1)...) over $3($4...)\n" . $herecurr);
}
# check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+\s*($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
my $oldfunc = $3;
}
# check for krealloc arg reuse
- if ($^V && $^V ge 5.10.0 &&
- $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*\1\s*,/) {
+ if ($perl_version_ok &&
+ $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*($Lval)\s*,/ &&
+ $1 eq $3) {
WARN("KREALLOC_ARG_REUSE",
"Reusing the krealloc arg is almost always a bug\n" . $herecurr);
}
}
# check for switch/default statements without a break;
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
my $cnt = statement_rawlines($stat);
"Avoid using bool as bitfield. Prefer bool bitfields as unsigned int or u<8|16|32>\n" . $herecurr);
}
+# check for bool use in .h files
+ if ($realfile =~ /\.h$/ &&
+ $sline =~ /^.\s+bool\s*$Ident\s*(?::\s*d+\s*)?;/) {
+ CHK("BOOL_MEMBER",
+ "Avoid using bool structure members because of possible alignment issues - see: https://lkml.org/lkml/2017/11/21/384\n" . $herecurr);
+ }
+
# check for semaphores initialized locked
if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
WARN("CONSIDER_COMPLETION",
}
# likely/unlikely comparisons similar to "(likely(foo) > 0)"
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /\b((?:un)?likely)\s*\(\s*$FuncArg\s*\)\s*$Compare/) {
WARN("LIKELY_MISUSE",
"Using $1 should generally have parentheses around the comparison\n" . $herecurr);
# check for DEVICE_ATTR uses that could be DEVICE_ATTR_<FOO>
# and whether or not function naming is typical and if
# DEVICE_ATTR permissions uses are unusual too
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /\bDEVICE_ATTR\s*\(\s*(\w+)\s*,\s*\(?\s*(\s*(?:${multi_mode_perms_string_search}|0[0-7]{3,3})\s*)\s*\)?\s*,\s*(\w+)\s*,\s*(\w+)\s*\)/) {
my $var = $1;
# specific definition of not visible in sysfs.
# o Ignore proc_create*(...) uses with a decimal 0 permission as that means
# use the default permissions
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$line =~ /$mode_perms_search/) {
foreach my $entry (@mode_permission_funcs) {
ERROR("NOT_UNIFIED_DIFF",
"Does not appear to be a unified-diff format patch\n");
}
- if ($is_patch && $has_commit_log && $chk_signoff && $signoff == 0) {
- ERROR("MISSING_SIGN_OFF",
- "Missing Signed-off-by: line(s)\n");
+ if ($is_patch && $has_commit_log && $chk_signoff) {
+ if ($signoff == 0) {
+ ERROR("MISSING_SIGN_OFF",
+ "Missing Signed-off-by: line(s)\n");
+ } elsif (!$authorsignoff) {
+ WARN("NO_AUTHOR_SIGN_OFF",
+ "Missing Signed-off-by: line by nominal patch author '$author'\n");
+ }
}
print report_dump();
my $output_rolestats = 1;
my $output_section_maxlen = 50;
my $scm = 0;
+my $tree = 1;
my $web = 0;
my $subsystem = 0;
my $status = 0;
my $version = 0;
my $help = 0;
my $find_maintainer_files = 0;
-
+my $maintainer_path;
my $vcs_used = 0;
my $exit = 0;
'subsystem!' => \$subsystem,
'status!' => \$status,
'scm!' => \$scm,
+ 'tree!' => \$tree,
'web!' => \$web,
'letters=s' => \$letters,
'pattern-depth=i' => \$pattern_depth,
'fe|file-emails!' => \$file_emails,
'f|file' => \$from_filename,
'find-maintainer-files' => \$find_maintainer_files,
+ 'mpath|maintainer-path=s' => \$maintainer_path,
'self-test:s' => \$self_test,
'v|version' => \$version,
'h|help|usage' => \$help,
die "$P: Please select at least 1 email option\n";
}
-if (!top_of_kernel_tree($lk_path)) {
+if ($tree && !top_of_kernel_tree($lk_path)) {
die "$P: The current directory does not appear to be "
. "a linux kernel source tree.\n";
}
read_all_maintainer_files();
sub read_all_maintainer_files {
- if (-d "${lk_path}MAINTAINERS") {
- opendir(DIR, "${lk_path}MAINTAINERS") or die $!;
- my @files = readdir(DIR);
- closedir(DIR);
- foreach my $file (@files) {
- push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./);
- }
- }
-
- if ($find_maintainer_files) {
- find( { wanted => \&find_is_maintainer_file,
- preprocess => \&find_ignore_git,
- no_chdir => 1,
- }, "${lk_path}");
+ my $path = "${lk_path}MAINTAINERS";
+ if (defined $maintainer_path) {
+ $path = $maintainer_path;
+ # Perl Cookbook tilde expansion if necessary
+ $path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
+ }
+
+ if (-d $path) {
+ $path .= '/' if ($path !~ m@/$@);
+ if ($find_maintainer_files) {
+ find( { wanted => \&find_is_maintainer_file,
+ preprocess => \&find_ignore_git,
+ no_chdir => 1,
+ }, "$path");
+ } else {
+ opendir(DIR, "$path") or die $!;
+ my @files = readdir(DIR);
+ closedir(DIR);
+ foreach my $file (@files) {
+ push(@mfiles, "$path$file") if ($file !~ /^\./);
+ }
+ }
+ } elsif (-f "$path") {
+ push(@mfiles, "$path");
} else {
- push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS";
+ die "$P: MAINTAINER file not found '$path'\n";
}
-
+ die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
foreach my $file (@mfiles) {
- read_maintainer_file("$file");
+ read_maintainer_file("$file");
}
}
--sections => print all of the subsystem sections with pattern matches
--letters => print all matching 'letter' types from all matching sections
--mailmap => use .mailmap file (default: $email_use_mailmap)
+ --no-tree => run without a kernel tree
--self-test => show potential issues with MAINTAINERS file content
--version => show version
--help => show this help information
Default options:
- [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
- --remove-duplicates --rolestats]
+ [--email --tree --nogit --git-fallback --m --r --n --l --multiline
+ --pattern-depth=0 --remove-duplicates --rolestats]
Notes:
Using "-f directory" may give unexpected results:
abandonning||abandoning
abigious||ambiguous
abitrate||arbitrate
+abord||abort
+aboslute||absolute
abov||above
abreviated||abbreviated
absense||absence
accesss||access
accidentaly||accidentally
accidentually||accidentally
+acclerated||accelerated
accoding||according
accomodate||accommodate
accomodates||accommodates
adddress||address
addreses||addresses
addresss||address
+addrress||address
aditional||additional
aditionally||additionally
aditionaly||additionally
adminstrative||administrative
adress||address
adresses||addresses
+adrresses||addresses
+advertisment||advertisement
adviced||advised
afecting||affecting
againt||against
ambigious||ambiguous
amoung||among
amout||amount
+amplifer||amplifier
an union||a union
an user||a user
an userspace||a userspace
assocation||association
associcated||associated
assotiated||associated
+asssert||assert
assum||assume
assumtpion||assumption
asuming||assuming
asycronous||asynchronous
asynchnous||asynchronous
+asynchromous||asynchronous
+asymetric||asymmetric
+asymmeric||asymmetric
atomatically||automatically
atomicly||atomically
atempt||attempt
availabe||available
availabled||available
availablity||availability
+availaible||available
availale||available
availavility||availability
availble||available
boundry||boundary
brievely||briefly
broadcat||broadcast
+bufufer||buffer
cacluated||calculated
+caculate||calculate
caculation||calculation
+cadidate||candidate
calender||calendar
calescing||coalescing
calle||called
capabitilies||capabilities
capatibilities||capabilities
capapbilities||capabilities
+caputure||capture
carefuly||carefully
cariage||carriage
catagory||category
cehck||check
challange||challenge
challanges||challenges
+chache||cache
chanell||channel
changable||changeable
chanined||chained
charcter||character
chcek||check
chck||check
+checksumed||checksummed
checksuming||checksumming
childern||children
childs||children
conbination||combination
conditionaly||conditionally
conected||connected
+conector||connector
connecetd||connected
configuartion||configuration
+configuation||configuration
configuratoin||configuration
configuraton||configuration
configuretion||configuration
continously||continuously
continueing||continuing
contraints||constraints
+contruct||construct
contol||control
contoller||controller
controled||controlled
deafult||default
deamon||daemon
decompres||decompress
+decsribed||described
decription||description
dectected||detected
defailt||default
desriptor||descriptor
desriptors||descriptors
destionation||destination
+destoried||destroyed
destory||destroy
destoryed||destroyed
destorys||destroys
difinition||definition
dimesions||dimensions
diplay||display
+directon||direction
direectly||directly
+diregard||disregard
disassocation||disassociation
disapear||disappear
disapeared||disappeared
disappared||disappeared
+disbale||disable
+disbaled||disabled
disble||disable
disbled||disabled
disconnet||disconnect
discontinous||discontinuous
+disharge||discharge
dispertion||dispersion
dissapears||disappears
distiction||distinction
+divisable||divisible
+divsiors||divisors
docuentation||documentation
documantation||documentation
documentaion||documentation
downlads||downloads
druing||during
dynmaic||dynamic
+eanable||enable
easilly||easily
ecspecially||especially
edditable||editable
extened||extended
extensability||extensibility
extention||extension
+extenstion||extension
extracter||extractor
+faield||failed
falied||failed
faild||failed
+failer||failure
faill||fail
failied||failed
faillure||failure
forse||force
fortan||fortran
forwardig||forwarding
+frambuffer||framebuffer
framming||framing
framwork||framework
frequncy||frequency
fucntion||function
fuction||function
fuctions||functions
+funcation||function
funcion||function
functionallity||functionality
functionaly||functionally
gaurenteed||guaranteed
generiously||generously
genereate||generate
+genereted||generated
genric||generic
globel||global
grabing||grabbing
halfs||halves
hander||handler
handfull||handful
+hanlde||handle
hanled||handled
happend||happened
harware||hardware
hybernate||hibernate
hierachy||hierarchy
hierarchie||hierarchy
+homogenous||homogeneous
howver||however
hsould||should
hypervior||hypervisor
identidier||identifier
iligal||illegal
illigal||illegal
+illgal||illegal
+iomaped||iomapped
imblance||imbalance
immeadiately||immediately
immedaite||immediate
initators||initiators
initialiazation||initialization
initializiation||initialization
+initialze||initialize
initialzed||initialized
initilization||initialization
initilize||initialize
inofficial||unofficial
+inrerface||interface
insititute||institute
instal||install
instanciated||instantiated
intrrupt||interrupt
intterrupt||interrupt
intuative||intuitive
+inavlid||invalid
invaid||invalid
invald||invalid
invalde||invalid
langugage||language
lauch||launch
layed||laid
+legnth||length
leightweight||lightweight
lengh||length
lenght||length
loggging||logging
loggin||login
logile||logfile
+loobpack||loopback
loosing||losing
losted||lost
machinary||machinery
maintainence||maintenance
maintan||maintain
makeing||making
+mailformed||malformed
malplaced||misplaced
malplace||misplace
managable||manageable
mangement||management
manoeuvering||maneuvering
mappping||mapping
+matchs||matches
mathimatical||mathematical
mathimatic||mathematic
mathimatics||mathematics
messsages||messages
micropone||microphone
microprocesspr||microprocessor
+migrateable||migratable
milliseonds||milliseconds
minium||minimum
minimam||minimum
miximum||maximum
mmnemonic||mnemonic
mnay||many
+modfiy||modify
modulues||modules
momery||memory
memomry||memory
numebr||number
numner||number
obtaion||obtain
+obusing||abusing
occassionally||occasionally
occationally||occasionally
occurance||occurrence
occured||occurred
occuring||occurring
offet||offset
+offloded||offloaded
omited||omitted
omiting||omitting
omitt||omit
parametised||parametrised
paramter||parameter
paramters||parameters
+parmaters||parameters
particuarly||particularly
particularily||particularly
partiton||partition
pathes||paths
pecularities||peculiarities
peformance||performance
+peforming||performing
peice||piece
pendantic||pedantic
peprocessor||preprocessor
persistance||persistence
persistant||persistent
plalform||platform
+platfoem||platform
platfrom||platform
plattform||platform
pleaes||please
positon||position
possibilites||possibilities
powerfull||powerful
+preamle||preamble
preample||preamble
preapre||prepare
preceeded||preceded
prefferably||preferably
premption||preemption
prepaired||prepared
+preperation||preparation
pressre||pressure
primative||primitive
princliple||principle
recyle||recycle
redircet||redirect
redirectrion||redirection
+redundacy||redundancy
reename||rename
refcounf||refcount
refence||reference
refernnce||reference
refrence||reference
registerd||registered
+registeration||registration
registeresd||registered
registerred||registered
registes||registers
requred||required
requried||required
requst||request
+reregisteration||reregistration
reseting||resetting
+reseverd||reserved
resizeable||resizable
resouce||resource
resouces||resources
ressizes||resizes
ressource||resource
ressources||resources
+restesting||retesting
retransmited||retransmitted
retreived||retrieved
retreive||retrieve
safly||safely
safty||safety
savable||saveable
+scaleing||scaling
scaned||scanned
scaning||scanning
scarch||search
secquence||sequence
secund||second
segement||segment
+semaphone||semaphore
+senario||scenario
senarios||scenarios
sentivite||sensitive
separatly||separately
seperatly||separately
seperator||separator
sepperate||separate
+seqeunce||sequence
+seqeuncer||sequencer
+seqeuencer||sequencer
sequece||sequence
sequencial||sequential
serveral||several
+servive||service
setts||sets
settting||setting
shotdown||shutdown
standart||standard
staticly||statically
stoped||stopped
+stoping||stopping
stoppped||stopped
straming||streaming
struc||struct
suble||subtle
substract||subtract
submition||submission
+suceed||succeed
succesfully||successfully
succesful||successful
successed||succeeded
surpresses||suppresses
susbsystem||subsystem
suspeneded||suspended
+suspsend||suspend
suspicously||suspiciously
swaping||swapping
switchs||switches
symetric||symmetric
synax||syntax
synchonized||synchronized
+synchronuously||synchronously
syncronize||synchronize
syncronized||synchronized
syncronizing||synchronizing
sytem||system
sythesis||synthesis
taht||that
+tansmit||transmit
targetted||targeted
targetting||targeting
+taskelt||tasklet
teh||the
temorary||temporary
temproarily||temporarily
+thead||thread
therfore||therefore
thier||their
threds||threads
throught||through
troughput||throughput
thses||these
+tiggers||triggers
tiggered||triggered
tipically||typically
+timeing||timing
timout||timeout
tmis||this
torerable||tolerable
+traking||tracking
tramsmitted||transmitted
tramsmit||transmit
tranasction||transaction
trasfer||transfer
trasmission||transmission
treshold||threshold
+trigerred||triggered
trigerring||triggering
trun||turn
tunning||tuning
tyep||type
udpate||update
uesd||used
+uknown||unknown
+usupported||unsupported
uncommited||uncommitted
unconditionaly||unconditionally
underun||underrun
unexpexted||unexpected
unfortunatelly||unfortunately
unifiy||unify
+uniterrupted||uninterrupted
unintialized||uninitialized
unkmown||unknown
unknonw||unknown
unknow||unknown
unkown||unknown
+unamed||unnamed
+uneeded||unneeded
unneded||unneeded
unneccecary||unnecessary
unneccesary||unnecessary
usege||usage
usera||users
usualy||usually
+usupported||unsupported
utilites||utilities
utillities||utilities
utilties||utilities
virtiual||virtual
visiters||visitors
vitual||virtual
+vunerable||vulnerable
wakeus||wakeups
+wathdog||watchdog
wating||waiting
wiat||wait
wether||whether
static void __init do_security_initcalls(void)
{
int ret;
- initcall_t *call;
- call = __security_initcall_start;
+ initcall_t call;
+ initcall_entry_t *ce;
+
+ ce = __security_initcall_start;
trace_initcall_level("security");
- while (call < __security_initcall_end) {
- trace_initcall_start((*call));
- ret = (*call) ();
- trace_initcall_finish((*call), ret);
- call++;
+ while (ce < __security_initcall_end) {
+ call = initcall_from_entry(ce);
+ trace_initcall_start(call);
+ ret = call();
+ trace_initcall_finish(call, ret);
+ ce++;
}
}
/proc-uptime-001
/proc-uptime-002
/read
+/self
+/thread-self
CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -D_GNU_SOURCE
TEST_GEN_PROGS :=
TEST_GEN_PROGS += fd-001-lookup
TEST_GEN_PROGS += proc-uptime-001
TEST_GEN_PROGS += proc-uptime-002
TEST_GEN_PROGS += read
+TEST_GEN_PROGS += self
+TEST_GEN_PROGS += thread-self
include ../lib.mk
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t sys_getpid(void)
+{
+ return syscall(SYS_getpid);
+}
+
+static inline pid_t sys_gettid(void)
+{
+ return syscall(SYS_gettid);
+}
static inline bool streq(const char *s1, const char *s2)
{
--- /dev/null
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/self gives correct TGID.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+int main(void)
+{
+ char buf1[64], buf2[64];
+ pid_t pid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ snprintf(buf1, sizeof(buf1), "%u", pid);
+
+ rv = readlink("/proc/self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/thread-self gives correct TGID/PID.
+#undef NDEBUG
+#include <assert.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "proc.h"
+
+int f(void *arg)
+{
+ char buf1[64], buf2[64];
+ pid_t pid, tid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ tid = sys_gettid();
+ snprintf(buf1, sizeof(buf1), "%u/task/%u", pid, tid);
+
+ rv = readlink("/proc/thread-self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ if (arg)
+ exit(0);
+ return 0;
+}
+
+int main(void)
+{
+ const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ pid_t pid;
+ void *stack;
+
+ /* main thread */
+ f((void *)0);
+
+ stack = mmap(NULL, 2 * PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(stack != MAP_FAILED);
+ /* side thread */
+ pid = clone(f, stack + PAGE_SIZE, CLONE_THREAD|CLONE_SIGHAND|CLONE_VM, (void *)1);
+ assert(pid > 0);
+ pause();
+
+ return 0;
+}
hugepage-mmap
hugepage-shm
map_hugetlb
+map_populate
thuge-gen
compaction_test
mlock2-tests
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
TEST_GEN_FILES += on-fault-limit
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Dmitry Safonov, Arista Networks
+ *
+ * MAP_POPULATE | MAP_PRIVATE should COW VMA pages.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifndef MMAP_SZ
+#define MMAP_SZ 4096
+#endif
+
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) { \
+ fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
+ __LINE__, (description), strerror(errno)); \
+ exit(1); \
+ } \
+ } while (0)
+
+static int parent_f(int sock, unsigned long *smap, int child)
+{
+ int status, ret;
+
+ ret = read(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ *smap = 0x22222BAD;
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = write(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ waitpid(child, &status, 0);
+ BUG_ON(!WIFEXITED(status), "child in unexpected state");
+
+ return WEXITSTATUS(status);
+}
+
+static int child_f(int sock, unsigned long *smap, int fd)
+{
+ int ret, buf = 0;
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_POPULATE, fd, 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file");
+
+ ret = write(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ ret = read(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
+ BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int sock[2], child, ret;
+ FILE *ftmp;
+ unsigned long *smap;
+
+ ftmp = tmpfile();
+ BUG_ON(ftmp == 0, "tmpfile()");
+
+ ret = ftruncate(fileno(ftmp), MMAP_SZ);
+ BUG_ON(ret, "ftruncate()");
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fileno(ftmp), 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ *smap = 0xdeadbabe;
+ /* Probably unnecessary, but let it be. */
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock);
+ BUG_ON(ret, "socketpair()");
+
+ child = fork();
+ BUG_ON(child == -1, "fork()");
+
+ if (child) {
+ ret = close(sock[0]);
+ BUG_ON(ret, "close()");
+
+ return parent_f(sock[1], smap, child);
+ }
+
+ ret = close(sock[1]);
+ BUG_ON(ret, "close()");
+
+ return child_f(sock[0], smap, fileno(ftmp));
+}
echo "[PASS]"
fi
+echo "--------------------"
+echo "running map_populate"
+echo "--------------------"
+./map_populate
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
echo "--------------------"
echo "running mlock2-tests"
echo "--------------------"
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
-__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end, bool blockable)
{
+ return 0;
}
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
srcu_read_unlock(&kvm->srcu, idx);
}
-static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
+ int ret;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
spin_unlock(&kvm->mmu_lock);
- kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+ ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable);
srcu_read_unlock(&kvm->srcu, idx);
+
+ return ret;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,