Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 19 Feb 2018 19:58:19 +0000 (11:58 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 19 Feb 2018 19:58:19 +0000 (11:58 -0800)
Pull networking fixes from David Miller:

 1) Prevent index integer overflow in ptr_ring, from Jason Wang.

 2) Program mvpp2 multicast filter properly, from Mikulas Patocka.

 3) The bridge brport attribute file is write only and doesn't have a
    ->show() method, don't blindly invoke it. From Xin Long.

 4) Inverted mask used in genphy_setup_forced(), from Ingo van Lil.

 5) Fix multiple definition issue with if_ether.h UAPI header, from
    Hauke Mehrtens.

 6) Fix GFP_KERNEL usage in atomic in RDS protocol code, from Sowmini
    Varadhan.

 7) Revert XDP redirect support from thunderx driver, it is not
    implemented properly. From Jesper Dangaard Brouer.

 8) Fix missing RTNL protection across some tipc operations, from Ying
    Xue.

 9) Return the correct IV bytes in the TLS getsockopt code, from Boris
    Pismenny.

10) Take tclassid into consideration properly when doing FIB rule
    matching. From Stefano Brivio.

11) cxgb4 device needs more PCI VPD quirks, from Casey Leedom.

12) TUN driver doesn't align frags properly, and we can end up doing
    unaligned atomics on misaligned metadata. From Eric Dumazet.

13) Fix various crashes found using DEBUG_PREEMPT in rmnet driver, from
    Subash Abhinov Kasiviswanathan.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (56 commits)
  tg3: APE heartbeat changes
  mlxsw: spectrum_router: Do not unconditionally clear route offload indication
  net: qualcomm: rmnet: Fix possible null dereference in command processing
  net: qualcomm: rmnet: Fix warning seen with 64 bit stats
  net: qualcomm: rmnet: Fix crash on real dev unregistration
  sctp: remove the left unnecessary check for chunk in sctp_renege_events
  rxrpc: Work around usercopy check
  tun: fix tun_napi_alloc_frags() frag allocator
  udplite: fix partial checksum initialization
  skbuff: Fix comment mis-spelling.
  dn_getsockoptdecnet: move nf_{get/set}sockopt outside sock lock
  PCI/cxgb4: Extend T3 PCI quirk to T4+ devices
  cxgb4: fix trailing zero in CIM LA dump
  cxgb4: free up resources of pf 0-3
  fib_semantics: Don't match route with mismatching tclassid
  NFC: llcp: Limit size of SDP URI
  tls: getsockopt return record sequence number
  tls: reset the crypto info if copy_from_user fails
  tls: retrun the correct IV in getsockopt
  docs: segmentation-offloads.txt: add SCTP info
  ...

281 files changed:
Documentation/ABI/testing/sysfs-devices-platform-dock [new file with mode: 0644]
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/ABI/testing/sysfs-platform-dptf [new file with mode: 0644]
Documentation/atomic_bitops.txt
Documentation/devicetree/bindings/power/mti,mips-cpc.txt [new file with mode: 0644]
Documentation/features/sched/membarrier-sync-core/arch-support.txt [new file with mode: 0644]
Documentation/locking/mutex-design.txt
MAINTAINERS
Makefile
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/hugetlb.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/efi.c
arch/arm64/kernel/hibernate.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/mm/hugetlbpage.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmu.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc.S
arch/ia64/kernel/Makefile
arch/mips/kernel/mips-cpc.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp-bmips.c
arch/powerpc/include/asm/book3s/32/pgtable.h
arch/powerpc/include/asm/book3s/64/hash-4k.h
arch/powerpc/include/asm/book3s/64/hash-64k.h
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgalloc.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/kexec.h
arch/powerpc/include/asm/nohash/32/pgtable.h
arch/powerpc/include/asm/nohash/64/pgtable.h
arch/powerpc/include/asm/topology.h
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/sysfs.c
arch/powerpc/mm/drmem.c
arch/powerpc/mm/hash64_4k.c
arch/powerpc/mm/hash64_64k.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/hugetlbpage-hash64.c
arch/powerpc/mm/init-common.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/pgtable-radix.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/tlb_hash64.c
arch/powerpc/platforms/powernv/opal-imc.c
arch/powerpc/platforms/powernv/vas-window.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/sysdev/xive/spapr.c
arch/sparc/Kconfig
arch/x86/.gitignore
arch/x86/Kconfig
arch/x86/Kconfig.cpu
arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
arch/x86/entry/calling.h
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/events/intel/core.c
arch/x86/events/intel/lbr.c
arch/x86/events/intel/p6.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/page_64.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/tlbflush.h
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/centaur.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cyrix.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_rdt.c
arch/x86/kernel/cpu/mcheck/mce-internal.h
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/head_32.S
arch/x86/kernel/mpparse.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/traps.c
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/lib/cpu.c
arch/x86/lib/error-inject.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/kmmio.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/tlb.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/smp.c
block/blk-mq.c
crypto/sha3_generic.c
drivers/acpi/bus.c
drivers/acpi/ec.c
drivers/acpi/property.c
drivers/acpi/spcr.c
drivers/base/core.c
drivers/base/power/wakeirq.c
drivers/base/property.c
drivers/char/hw_random/via-rng.c
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/longhaul.c
drivers/cpufreq/p4-clockmod.c
drivers/cpufreq/powernow-k7.c
drivers/cpufreq/speedstep-centrino.c
drivers/cpufreq/speedstep-lib.c
drivers/crypto/caam/ctrl.c
drivers/crypto/padlock-aes.c
drivers/crypto/sunxi-ss/sun4i-ss-prng.c
drivers/crypto/talitos.c
drivers/edac/amd64_edac.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/gvt/trace.h
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_oa_cflgt3.c
drivers/gpu/drm/i915/i915_oa_cnl.c
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_pmu.h
drivers/gpu/drm/i915/intel_bios.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_cdclk.c
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
drivers/hwmon/coretemp.c
drivers/hwmon/hwmon-vid.c
drivers/hwmon/k10temp.c
drivers/hwmon/k8temp.c
drivers/irqchip/irq-bcm7038-l1.c
drivers/irqchip/irq-bcm7120-l2.c
drivers/irqchip/irq-brcmstb-l2.c
drivers/irqchip/irq-gic-v2m.c
drivers/irqchip/irq-gic-v3-its-pci-msi.c
drivers/irqchip/irq-gic-v3-its-platform-msi.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-mips-gic.c
drivers/macintosh/macio_asic.c
drivers/md/dm.c
drivers/misc/ocxl/file.c
drivers/mmc/host/bcm2835.c
drivers/mmc/host/meson-gx-mmc.c
drivers/mtd/nand/Kconfig
drivers/mtd/nand/vf610_nfc.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/io-cmd.c
drivers/of/property.c
drivers/opp/cpu.c
drivers/platform/x86/dell-laptop.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/wmi.c
drivers/s390/virtio/virtio_ccw.c
drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
drivers/usb/Kconfig
drivers/usb/host/Kconfig
drivers/video/fbdev/geode/video_gx.c
drivers/xen/pvcalls-front.c
drivers/xen/xenbus/xenbus.h
drivers/xen/xenbus/xenbus_comms.c
drivers/xen/xenbus/xenbus_xs.c
fs/btrfs/backref.c
fs/btrfs/delayed-ref.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/qgroup.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/gfs2/bmap.c
fs/proc/kcore.c
include/asm-generic/bitops/lock.h
include/linux/acpi.h
include/linux/blkdev.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/cpuidle.h
include/linux/cpumask.h
include/linux/dma-mapping.h
include/linux/fwnode.h
include/linux/kcore.h
include/linux/mm_inline.h
include/linux/nospec.h
include/linux/property.h
include/linux/semaphore.h
include/sound/ac97/regs.h
include/trace/events/xen.h
kernel/irq/irqdomain.c
kernel/kprobes.c
kernel/locking/qspinlock.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/deadline.c
kernel/sched/rt.c
lib/dma-direct.c
mm/memory-failure.c
mm/memory.c
net/9p/trans_virtio.c
sound/ac97/Kconfig
sound/core/seq/seq_clientmgr.c
sound/pci/hda/patch_realtek.c
sound/usb/mixer.c
sound/usb/pcm.c
sound/usb/quirks.c
tools/arch/powerpc/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/unistd.h [deleted file]
tools/arch/x86/include/asm/cpufeatures.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/if_link.h
tools/include/uapi/linux/kvm.h
tools/objtool/check.c
tools/objtool/check.h
tools/perf/Documentation/perf-data.txt
tools/perf/arch/s390/Makefile
tools/perf/arch/s390/entry/syscalls/mksyscalltbl
tools/perf/arch/s390/entry/syscalls/syscall.tbl [new file with mode: 0644]
tools/perf/builtin-c2c.c
tools/perf/builtin-report.c
tools/perf/builtin-top.c
tools/perf/check-headers.sh
tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/mapfile.csv
tools/perf/tests/backward-ring-buffer.c
tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
tools/perf/ui/browsers/hists.c
tools/perf/ui/browsers/hists.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/hist.h
tools/perf/util/mmap.c
tools/perf/util/mmap.h
tools/perf/util/util.c
tools/testing/selftests/powerpc/alignment/alignment_handler.c
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/mpx-mini-test.c
tools/testing/selftests/x86/protection_keys.c
tools/testing/selftests/x86/single_step_syscall.c
tools/testing/selftests/x86/test_mremap_vdso.c
tools/testing/selftests/x86/test_vdso.c
tools/testing/selftests/x86/test_vsyscall.c

diff --git a/Documentation/ABI/testing/sysfs-devices-platform-dock b/Documentation/ABI/testing/sysfs-devices-platform-dock
new file mode 100644 (file)
index 0000000..1d8c18f
--- /dev/null
@@ -0,0 +1,39 @@
+What:          /sys/devices/platform/dock.N/docked
+Date:          Dec, 2006
+KernelVersion: 2.6.19
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Value 1 or 0 indicates whether the software believes the
+               laptop is docked in a docking station.
+
+What:          /sys/devices/platform/dock.N/undock
+Date:          Dec, 2006
+KernelVersion: 2.6.19
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (WO) Writing to this file causes the software to initiate an
+               undock request to the firmware.
+
+What:          /sys/devices/platform/dock.N/uid
+Date:          Feb, 2007
+KernelVersion: v2.6.21
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Displays the docking station the laptop is docked to.
+
+What:          /sys/devices/platform/dock.N/flags
+Date:          May, 2007
+KernelVersion: v2.6.21
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Show dock station flags, useful for checking if undock
+               request has been made by the user (from the immediate_undock
+               option).
+
+What:          /sys/devices/platform/dock.N/type
+Date:          Aug, 2008
+KernelVersion: v2.6.27
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Display the dock station type- dock_station, ata_bay or
+               battery_bay.
index bfd29bc8d37af1bbc4913deee9d941b1692bedda..4ed63b6cfb155cbc8b5aaab64e1030662f19411f 100644 (file)
@@ -108,6 +108,8 @@ Description:        CPU topology files that describe a logical CPU's relationship
 
 What:          /sys/devices/system/cpu/cpuidle/current_driver
                /sys/devices/system/cpu/cpuidle/current_governer_ro
+               /sys/devices/system/cpu/cpuidle/available_governors
+               /sys/devices/system/cpu/cpuidle/current_governor
 Date:          September 2007
 Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Discover cpuidle policy and mechanism
@@ -119,13 +121,84 @@ Description:      Discover cpuidle policy and mechanism
                Idle policy (governor) is differentiated from idle mechanism
                (driver)
 
-               current_driver: displays current idle mechanism
+               current_driver: (RO) displays current idle mechanism
 
-               current_governor_ro: displays current idle policy
+               current_governor_ro: (RO) displays current idle policy
+
+               With the cpuidle_sysfs_switch boot option enabled (meant for
+               developer testing), the following three attributes are visible
+               instead:
+
+               current_driver: same as described above
+
+               available_governors: (RO) displays a space separated list of
+               available governors
+
+               current_governor: (RW) displays current idle policy. Users can
+               switch the governor at runtime by writing to this file.
 
                See files in Documentation/cpuidle/ for more information.
 
 
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/name
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/power
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/time
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+Date:          September 2007
+KernelVersion: v2.6.24
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
+               logical CPU specific cpuidle information for each online cpu X.
+               The processor idle states which are available for use have the
+               following attributes:
+
+               name: (RO) Name of the idle state (string).
+
+               latency: (RO) The latency to exit out of this idle state (in
+               microseconds).
+
+               power: (RO) The power consumed while in this idle state (in
+               milliwatts).
+
+               time: (RO) The total time spent in this idle state (in microseconds).
+
+               usage: (RO) Number of times this state was entered (a count).
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
+Date:          February 2008
+KernelVersion: v2.6.25
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RO) A small description about the idle state (string).
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
+Date:          March 2012
+KernelVersion: v3.10
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RW) Option to disable this idle state (bool). The behavior and
+               the effect of the disable variable depends on the implementation
+               of a particular governor. In the ladder governor, for example,
+               it is not coherent, i.e. if one is disabling a light state, then
+               all deeper states are disabled as well, but the disable variable
+               does not reflect it. Likewise, if one enables a deep state but a
+               lighter state still is disabled, then this has no effect.
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
+Date:          March 2014
+KernelVersion: v3.15
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RO) Display the target residency i.e. the minimum amount of
+               time (in microseconds) this cpu should spend in this idle state
+               to make the transition worth the effort.
+
+
 What:          /sys/devices/system/cpu/cpu#/cpufreq/*
 Date:          pre-git history
 Contact:       linux-pm@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
new file mode 100644 (file)
index 0000000..325dc06
--- /dev/null
@@ -0,0 +1,40 @@
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) The charger type - Traditional, Hybrid or NVDC.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Adapter rating in milliwatts (the maximum Adapter power).
+               Must be 0 if no AC Adaptor is plugged in.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Maximum platform power that can be supported by the battery
+               in milliwatts.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Display the platform power source
+               0x00 = DC
+               0x01 = AC
+               0x02 = USB
+               0x03 = Wireless Charger
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) The maximum sustained power for battery in milliwatts.
index 5550bfdcce5f1cfaab57728f4c272e9e84902860..be70b32c95d918066ffa72dfa4a69e8b4e51a225 100644 (file)
@@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
 
  - RMW operations that have a return value are fully ordered.
 
-Except for test_and_set_bit_lock() which has ACQUIRE semantics and
+ - RMW operations that are conditional are unordered on FAILURE,
+   otherwise the above rules apply. In the case of test_and_{}_bit() operations,
+   if the bit in memory is unchanged by the operation then it is deemed to have
+   failed.
+
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
 clear_bit_unlock() which has RELEASE semantics.
 
 Since a platform only has a single means of achieving atomic operations
diff --git a/Documentation/devicetree/bindings/power/mti,mips-cpc.txt b/Documentation/devicetree/bindings/power/mti,mips-cpc.txt
new file mode 100644 (file)
index 0000000..c6b8251
--- /dev/null
@@ -0,0 +1,8 @@
+Binding for MIPS Cluster Power Controller (CPC).
+
+This binding allows a system to specify where the CPC registers are
+located.
+
+Required properties:
+compatible : Should be "mti,mips-cpc".
+regs: Should describe the address & size of the CPC register region.
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
new file mode 100644 (file)
index 0000000..2c815a7
--- /dev/null
@@ -0,0 +1,62 @@
+#
+# Feature name:          membarrier-sync-core
+#         Kconfig:       ARCH_HAS_MEMBARRIER_SYNC_CORE
+#         description:   arch supports core serializing membarrier
+#
+# Architecture requirements
+#
+# * arm64
+#
+# Rely on eret context synchronization when returning from IPI handler, and
+# when returning to user-space.
+#
+# * x86
+#
+# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
+# instruction is core serializing, but not SYSEXIT.
+#
+# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it can return to user-space through either SYSRETL (compat code),
+# SYSRETQ, or IRET.
+#
+# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
+# instead on write_cr3() performed by switch_mm() to provide core serialization
+# after changing the current mm, and deal with the special case of kthread ->
+# uthread (temporarily keeping current mm into active_mm) by issuing a
+# sync_core_before_usermode() in that specific case.
+#
+    -----------------------
+    |         arch |status|
+    -----------------------
+    |       alpha: | TODO |
+    |         arc: | TODO |
+    |         arm: | TODO |
+    |       arm64: |  ok  |
+    |    blackfin: | TODO |
+    |         c6x: | TODO |
+    |        cris: | TODO |
+    |         frv: | TODO |
+    |       h8300: | TODO |
+    |     hexagon: | TODO |
+    |        ia64: | TODO |
+    |        m32r: | TODO |
+    |        m68k: | TODO |
+    |       metag: | TODO |
+    |  microblaze: | TODO |
+    |        mips: | TODO |
+    |     mn10300: | TODO |
+    |       nios2: | TODO |
+    |    openrisc: | TODO |
+    |      parisc: | TODO |
+    |     powerpc: | TODO |
+    |        s390: | TODO |
+    |       score: | TODO |
+    |          sh: | TODO |
+    |       sparc: | TODO |
+    |        tile: | TODO |
+    |          um: | TODO |
+    |   unicore32: | TODO |
+    |         x86: |  ok  |
+    |      xtensa: | TODO |
+    -----------------------
index 60c482df1a38db2b300952832095ef41e2b8e655..818aca19612f4a763c96ee2eb1fbf8b3cb452e9e 100644 (file)
@@ -21,37 +21,23 @@ Implementation
 --------------
 
 Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
-         1: unlocked
-         0: locked, no waiters
-   negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
+and implemented in kernel/locking/mutex.c. These locks use an atomic variable
+(->owner) to keep track of the lock state during its lifetime.  Field owner
+actually contains 'struct task_struct *' to the current lock owner and it is
+therefore NULL if not currently owned. Since task_struct pointers are aligned
+at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
+if waiter list is non-empty).  In its most basic form it also includes a
+wait-queue and a spinlock that serializes access to it. Furthermore,
+CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
+below in (ii).
 
 When acquiring a mutex, there are three possible paths that can be
 taken, depending on the state of the lock:
 
-(i) fastpath: tries to atomically acquire the lock by decrementing the
-    counter. If it was already taken by another task it goes to the next
-    possible path. This logic is architecture specific. On x86-64, the
-    locking fastpath is 2 instructions:
-
-    0000000000000e10 <mutex_lock>:
-    e21:   f0 ff 0b                lock decl (%rbx)
-    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
-
-   the unlocking fastpath is equally tight:
-
-    0000000000000bc0 <mutex_unlock>:
-    bc8:   f0 ff 07                lock incl (%rdi)
-    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
-
+(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
+    the current task. This only works in the uncontended case (cmpxchg() checks
+    against 0UL, so all 3 state bits above have to be 0). If the lock is
+    contended it goes to the next possible path.
 
 (ii) midpath: aka optimistic spinning, tries to spin for acquisition
      while the lock owner is running and there are no other tasks ready
@@ -143,11 +129,10 @@ Test if the mutex is taken:
 Disadvantages
 -------------
 
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
-for the largest lock in the kernel. Larger structure sizes mean more
-CPU cache and memory footprint.
+Unlike its original design and purpose, 'struct mutex' is among the largest
+locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
+is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
+cache and memory footprint.
 
 When to use mutexes
 -------------------
index 3bdc260e36b7a7eaac26003b187c436655fc3412..9a7f76eadae9a51c5b49e5faadf8aa0aef19f15c 100644 (file)
@@ -9206,6 +9206,7 @@ MIPS GENERIC PLATFORM
 M:     Paul Burton <paul.burton@mips.com>
 L:     linux-mips@linux-mips.org
 S:     Supported
+F:     Documentation/devicetree/bindings/power/mti,mips-cpc.txt
 F:     arch/mips/generic/
 F:     arch/mips/tools/generic-board-config.sh
 
@@ -9945,6 +9946,7 @@ F:        drivers/nfc/nxp-nci
 
 OBJTOOL
 M:     Josh Poimboeuf <jpoimboe@redhat.com>
+M:     Peter Zijlstra <peterz@infradead.org>
 S:     Supported
 F:     tools/objtool/
 
index 79ad2bfa24b68f279af011f82e87f16d64d1440d..d9cf3a40eda9d20ce03ceda2ebc921a95dc2aea7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
index be7bd19c87ec23949c4dcbdfe8bbd4972dbae00a..eda8c5f629fc8553af2cdccaede5dfd209cecf94 100644 (file)
@@ -20,7 +20,7 @@
 
 #define MPIDR_UP_BITMASK       (0x1 << 30)
 #define MPIDR_MT_BITMASK       (0x1 << 24)
-#define MPIDR_HWID_BITMASK     0xff00ffffff
+#define MPIDR_HWID_BITMASK     0xff00ffffffUL
 
 #define MPIDR_LEVEL_BITS_SHIFT 3
 #define MPIDR_LEVEL_BITS       (1 << MPIDR_LEVEL_BITS_SHIFT)
index 1dca41bea16ad61fc8fe6f2be528ba452bffdf27..e73f6856962461952287b244831395200cdc3853 100644 (file)
@@ -22,7 +22,7 @@
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
-       return *ptep;
+       return READ_ONCE(*ptep);
 }
 
 
index 9679067a15746ce921ba5a138c6508c0a4972d0a..7faed6e48b46212709485b7225c512f3fb99831e 100644 (file)
@@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
        return pmd;
 }
 
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
+static inline void kvm_set_s2pte_readonly(pte_t *ptep)
 {
        pteval_t old_pteval, pteval;
 
-       pteval = READ_ONCE(pte_val(*pte));
+       pteval = READ_ONCE(pte_val(*ptep));
        do {
                old_pteval = pteval;
                pteval &= ~PTE_S2_RDWR;
                pteval |= PTE_S2_RDONLY;
-               pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+               pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
        } while (pteval != old_pteval);
 }
 
-static inline bool kvm_s2pte_readonly(pte_t *pte)
+static inline bool kvm_s2pte_readonly(pte_t *ptep)
 {
-       return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+       return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
 }
 
-static inline bool kvm_s2pte_exec(pte_t *pte)
+static inline bool kvm_s2pte_exec(pte_t *ptep)
 {
-       return !(pte_val(*pte) & PTE_S2_XN);
+       return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
 }
 
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
 {
-       kvm_set_s2pte_readonly((pte_t *)pmd);
+       kvm_set_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
 {
-       return kvm_s2pte_readonly((pte_t *)pmd);
+       return kvm_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
 {
-       return !(pmd_val(*pmd) & PMD_S2_XN);
+       return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
 }
 
 static inline bool kvm_page_empty(void *ptr)
index 8d3331985d2e34b2099eab6cec8b456d40983052..39ec0b8a689eea3e495029685bed047737d64c5e 100644 (file)
@@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void cpu_replace_ttbr1(pgd_t *pgd)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
 {
        typedef void (ttbr_replace_func)(phys_addr_t);
        extern ttbr_replace_func idmap_cpu_replace_ttbr1;
        ttbr_replace_func *replace_phys;
 
-       phys_addr_t pgd_phys = virt_to_phys(pgd);
+       phys_addr_t pgd_phys = virt_to_phys(pgdp);
 
        replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
 
index e9d9f1b006efec5708fd0f33c006d2e017711a4d..2e05bcd944c8395b9fd5af993b6214054352aefc 100644 (file)
@@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
        return (pmd_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
 {
-       BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-       free_page((unsigned long)pmd);
+       BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+       free_page((unsigned long)pmdp);
 }
 
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
-       set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
+       set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-       __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+       __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
 }
 #else
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
        BUILD_BUG();
 }
@@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
        return (pud_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
 {
-       BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-       free_page((unsigned long)pud);
+       BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
+       free_page((unsigned long)pudp);
 }
 
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
-       set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
+       set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
 {
-       __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+       __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
 }
 #else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
        BUILD_BUG();
 }
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 /*
  * Free a PTE table.
  */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
 {
-       if (pte)
-               free_page((unsigned long)pte);
+       if (ptep)
+               free_page((unsigned long)ptep);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
        __free_page(pte);
 }
 
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
                                  pmdval_t prot)
 {
-       set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
+       set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
 }
 
 /*
index 094374c82db088816d6a35ec75e1658dfc446ec4..7e2c27e63cd894371655a569046faaa67cfc1837 100644 (file)
@@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
-       *ptep = pte;
+       WRITE_ONCE(*ptep, pte);
 
        /*
         * Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
+       pte_t old_pte;
+
        if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
                __sync_icache_dcache(pte, addr);
 
@@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
         * hardware updates of the pte (ptep_set_access_flags safely changes
         * valid ptes without going through an invalid entry).
         */
-       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+       old_pte = READ_ONCE(*ptep);
+       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
           (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
                VM_WARN_ONCE(!pte_young(pte),
                             "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
-                            __func__, pte_val(*ptep), pte_val(pte));
-               VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+                            __func__, pte_val(old_pte), pte_val(pte));
+               VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
                             "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
-                            __func__, pte_val(*ptep), pte_val(pte));
+                            __func__, pte_val(old_pte), pte_val(pte));
        }
 
        set_pte(ptep, pte);
@@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-       *pmdp = pmd;
+       WRITE_ONCE(*pmdp, pmd);
        dsb(ishst);
        isb();
 }
@@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
-       *pudp = pud;
+       WRITE_ONCE(*pudp, pud);
        dsb(ishst);
        isb();
 }
@@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)                (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-#define pmd_offset_phys(dir, addr)     (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset_phys(dir, addr)     (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)          ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
 #define pmd_set_fixmap(addr)           ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
-       *pgdp = pgd;
+       WRITE_ONCE(*pgdp, pgd);
        dsb(ishst);
 }
 
@@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)                (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-#define pud_offset_phys(dir, addr)     (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset_phys(dir, addr)     (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)          ((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
 #define pud_set_fixmap(addr)           ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
index 07823595b7f01690823da724584965bca0872588..52f15cd896e11ad631ac3092d9709337a9629bb4 100644 (file)
@@ -406,6 +406,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
                MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
        },
+       {
+               .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+               MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+               .enable = qcom_enable_link_stack_sanitization,
+       },
+       {
+               .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+               MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+       },
        {
                .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
                MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
index f85ac58d08a35676f38fa2a6d7b0887fb6f1d2ec..a8bf1c892b9065ca40ed4b263317deced2b8d693 100644 (file)
@@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
                                  unsigned long addr, void *data)
 {
        efi_memory_desc_t *md = data;
-       pte_t pte = *ptep;
+       pte_t pte = READ_ONCE(*ptep);
 
        if (md->attribute & EFI_MEMORY_RO)
                pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
index f20cf7e992495adffcd2049c4c40f9ec6798c99f..1ec5f28c39fc56c4aae85cc5801bd513cc3ea2c3 100644 (file)
@@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
                                 gfp_t mask)
 {
        int rc = 0;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
        unsigned long dst = (unsigned long)allocator(mask);
 
        if (!dst) {
@@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
        memcpy((void *)dst, src_start, length);
        flush_icache_range(dst, dst + length);
 
-       pgd = pgd_offset_raw(allocator(mask), dst_addr);
-       if (pgd_none(*pgd)) {
-               pud = allocator(mask);
-               if (!pud) {
+       pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+       if (pgd_none(READ_ONCE(*pgdp))) {
+               pudp = allocator(mask);
+               if (!pudp) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pgd_populate(&init_mm, pgd, pud);
+               pgd_populate(&init_mm, pgdp, pudp);
        }
 
-       pud = pud_offset(pgd, dst_addr);
-       if (pud_none(*pud)) {
-               pmd = allocator(mask);
-               if (!pmd) {
+       pudp = pud_offset(pgdp, dst_addr);
+       if (pud_none(READ_ONCE(*pudp))) {
+               pmdp = allocator(mask);
+               if (!pmdp) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pud_populate(&init_mm, pud, pmd);
+               pud_populate(&init_mm, pudp, pmdp);
        }
 
-       pmd = pmd_offset(pud, dst_addr);
-       if (pmd_none(*pmd)) {
-               pte = allocator(mask);
-               if (!pte) {
+       pmdp = pmd_offset(pudp, dst_addr);
+       if (pmd_none(READ_ONCE(*pmdp))) {
+               ptep = allocator(mask);
+               if (!ptep) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pmd_populate_kernel(&init_mm, pmd, pte);
+               pmd_populate_kernel(&init_mm, pmdp, ptep);
        }
 
-       pte = pte_offset_kernel(pmd, dst_addr);
-       set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+       ptep = pte_offset_kernel(pmdp, dst_addr);
+       set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 
        /*
         * Load our new page tables. A strict BBM approach requires that we
@@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
         */
        cpu_set_reserved_ttbr0();
        local_flush_tlb_all();
-       write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
+       write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
        isb();
 
        *phys_dst_addr = virt_to_phys((void *)dst);
@@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
        return ret;
 }
 
-static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 {
-       pte_t pte = *src_pte;
+       pte_t pte = READ_ONCE(*src_ptep);
 
        if (pte_valid(pte)) {
                /*
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
                 * read only (code, rodata). Clear the RDONLY bit from
                 * the temporary mappings we use during restore.
                 */
-               set_pte(dst_pte, pte_mkwrite(pte));
+               set_pte(dst_ptep, pte_mkwrite(pte));
        } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
                /*
                 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
                 */
                BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-               set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
+               set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
        }
 }
 
-static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
                    unsigned long end)
 {
-       pte_t *src_pte;
-       pte_t *dst_pte;
+       pte_t *src_ptep;
+       pte_t *dst_ptep;
        unsigned long addr = start;
 
-       dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
-       if (!dst_pte)
+       dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+       if (!dst_ptep)
                return -ENOMEM;
-       pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
-       dst_pte = pte_offset_kernel(dst_pmd, start);
+       pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+       dst_ptep = pte_offset_kernel(dst_pmdp, start);
 
-       src_pte = pte_offset_kernel(src_pmd, start);
+       src_ptep = pte_offset_kernel(src_pmdp, start);
        do {
-               _copy_pte(dst_pte, src_pte, addr);
-       } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+               _copy_pte(dst_ptep, src_ptep, addr);
+       } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
 
        return 0;
 }
 
-static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
                    unsigned long end)
 {
-       pmd_t *src_pmd;
-       pmd_t *dst_pmd;
+       pmd_t *src_pmdp;
+       pmd_t *dst_pmdp;
        unsigned long next;
        unsigned long addr = start;
 
-       if (pud_none(*dst_pud)) {
-               dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pmd)
+       if (pud_none(READ_ONCE(*dst_pudp))) {
+               dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pmdp)
                        return -ENOMEM;
-               pud_populate(&init_mm, dst_pud, dst_pmd);
+               pud_populate(&init_mm, dst_pudp, dst_pmdp);
        }
-       dst_pmd = pmd_offset(dst_pud, start);
+       dst_pmdp = pmd_offset(dst_pudp, start);
 
-       src_pmd = pmd_offset(src_pud, start);
+       src_pmdp = pmd_offset(src_pudp, start);
        do {
+               pmd_t pmd = READ_ONCE(*src_pmdp);
+
                next = pmd_addr_end(addr, end);
-               if (pmd_none(*src_pmd))
+               if (pmd_none(pmd))
                        continue;
-               if (pmd_table(*src_pmd)) {
-                       if (copy_pte(dst_pmd, src_pmd, addr, next))
+               if (pmd_table(pmd)) {
+                       if (copy_pte(dst_pmdp, src_pmdp, addr, next))
                                return -ENOMEM;
                } else {
-                       set_pmd(dst_pmd,
-                               __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+                       set_pmd(dst_pmdp,
+                               __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
                }
-       } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+       } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
 
        return 0;
 }
 
-static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
                    unsigned long end)
 {
-       pud_t *dst_pud;
-       pud_t *src_pud;
+       pud_t *dst_pudp;
+       pud_t *src_pudp;
        unsigned long next;
        unsigned long addr = start;
 
-       if (pgd_none(*dst_pgd)) {
-               dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pud)
+       if (pgd_none(READ_ONCE(*dst_pgdp))) {
+               dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pudp)
                        return -ENOMEM;
-               pgd_populate(&init_mm, dst_pgd, dst_pud);
+               pgd_populate(&init_mm, dst_pgdp, dst_pudp);
        }
-       dst_pud = pud_offset(dst_pgd, start);
+       dst_pudp = pud_offset(dst_pgdp, start);
 
-       src_pud = pud_offset(src_pgd, start);
+       src_pudp = pud_offset(src_pgdp, start);
        do {
+               pud_t pud = READ_ONCE(*src_pudp);
+
                next = pud_addr_end(addr, end);
-               if (pud_none(*src_pud))
+               if (pud_none(pud))
                        continue;
-               if (pud_table(*(src_pud))) {
-                       if (copy_pmd(dst_pud, src_pud, addr, next))
+               if (pud_table(pud)) {
+                       if (copy_pmd(dst_pudp, src_pudp, addr, next))
                                return -ENOMEM;
                } else {
-                       set_pud(dst_pud,
-                               __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+                       set_pud(dst_pudp,
+                               __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
                }
-       } while (dst_pud++, src_pud++, addr = next, addr != end);
+       } while (dst_pudp++, src_pudp++, addr = next, addr != end);
 
        return 0;
 }
 
-static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
                            unsigned long end)
 {
        unsigned long next;
        unsigned long addr = start;
-       pgd_t *src_pgd = pgd_offset_k(start);
+       pgd_t *src_pgdp = pgd_offset_k(start);
 
-       dst_pgd = pgd_offset_raw(dst_pgd, start);
+       dst_pgdp = pgd_offset_raw(dst_pgdp, start);
        do {
                next = pgd_addr_end(addr, end);
-               if (pgd_none(*src_pgd))
+               if (pgd_none(READ_ONCE(*src_pgdp)))
                        continue;
-               if (copy_pud(dst_pgd, src_pgd, addr, next))
+               if (copy_pud(dst_pgdp, src_pgdp, addr, next))
                        return -ENOMEM;
-       } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+       } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
 
        return 0;
 }
index 116252a8d3a5507295ed30abfd740e1cc07446ae..870f4b1587f97496c3fd427fe6b652bae1a96cd7 100644 (file)
@@ -407,8 +407,10 @@ again:
                u32 midr = read_cpuid_id();
 
                /* Apply BTAC predictors mitigation to all Falkor chips */
-               if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+               if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+                   ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
                        __qcom_hyp_sanitize_btac_predictors();
+               }
        }
 
        fp_enabled = __fpsimd_enabled();
index 7b60d62ac5939e83c8e153ec1c3a0447565f23eb..65dfc8571bf8397c3f2a6297d21b5112794461e1 100644 (file)
@@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
 {
-       pte_t *pte = pte_offset_kernel(pmd, 0UL);
+       pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+       for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
                addr = start + i * PAGE_SIZE;
-               note_page(st, addr, 4, pte_val(*pte));
+               note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
        }
 }
 
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
 {
-       pmd_t *pmd = pmd_offset(pud, 0UL);
+       pmd_t *pmdp = pmd_offset(pudp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+       for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+               pmd_t pmd = READ_ONCE(*pmdp);
+
                addr = start + i * PMD_SIZE;
-               if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-                       note_page(st, addr, 3, pmd_val(*pmd));
+               if (pmd_none(pmd) || pmd_sect(pmd)) {
+                       note_page(st, addr, 3, pmd_val(pmd));
                } else {
-                       BUG_ON(pmd_bad(*pmd));
-                       walk_pte(st, pmd, addr);
+                       BUG_ON(pmd_bad(pmd));
+                       walk_pte(st, pmdp, addr);
                }
        }
 }
 
-static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
 {
-       pud_t *pud = pud_offset(pgd, 0UL);
+       pud_t *pudp = pud_offset(pgdp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+       for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+               pud_t pud = READ_ONCE(*pudp);
+
                addr = start + i * PUD_SIZE;
-               if (pud_none(*pud) || pud_sect(*pud)) {
-                       note_page(st, addr, 2, pud_val(*pud));
+               if (pud_none(pud) || pud_sect(pud)) {
+                       note_page(st, addr, 2, pud_val(pud));
                } else {
-                       BUG_ON(pud_bad(*pud));
-                       walk_pmd(st, pud, addr);
+                       BUG_ON(pud_bad(pud));
+                       walk_pmd(st, pudp, addr);
                }
        }
 }
@@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
                     unsigned long start)
 {
-       pgd_t *pgd = pgd_offset(mm, 0UL);
+       pgd_t *pgdp = pgd_offset(mm, 0UL);
        unsigned i;
        unsigned long addr;
 
-       for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+       for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+               pgd_t pgd = READ_ONCE(*pgdp);
+
                addr = start + i * PGDIR_SIZE;
-               if (pgd_none(*pgd)) {
-                       note_page(st, addr, 1, pgd_val(*pgd));
+               if (pgd_none(pgd)) {
+                       note_page(st, addr, 1, pgd_val(pgd));
                } else {
-                       BUG_ON(pgd_bad(*pgd));
-                       walk_pud(st, pgd, addr);
+                       BUG_ON(pgd_bad(pgd));
+                       walk_pud(st, pgdp, addr);
                }
        }
 }
index f76bb2c3c9434dc29c572d4103f9eb10b42dc278..bff11553eb050306dfa9df7fec0682f6a03cbf61 100644 (file)
@@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
 void show_pte(unsigned long addr)
 {
        struct mm_struct *mm;
-       pgd_t *pgd;
+       pgd_t *pgdp;
+       pgd_t pgd;
 
        if (addr < TASK_SIZE) {
                /* TTBR0 */
@@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
                return;
        }
 
-       pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+       pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
                 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
                 VA_BITS, mm->pgd);
-       pgd = pgd_offset(mm, addr);
-       pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
+       pgdp = pgd_offset(mm, addr);
+       pgd = READ_ONCE(*pgdp);
+       pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
 
        do {
-               pud_t *pud;
-               pmd_t *pmd;
-               pte_t *pte;
+               pud_t *pudp, pud;
+               pmd_t *pmdp, pmd;
+               pte_t *ptep, pte;
 
-               if (pgd_none(*pgd) || pgd_bad(*pgd))
+               if (pgd_none(pgd) || pgd_bad(pgd))
                        break;
 
-               pud = pud_offset(pgd, addr);
-               pr_cont(", *pud=%016llx", pud_val(*pud));
-               if (pud_none(*pud) || pud_bad(*pud))
+               pudp = pud_offset(pgdp, addr);
+               pud = READ_ONCE(*pudp);
+               pr_cont(", pud=%016llx", pud_val(pud));
+               if (pud_none(pud) || pud_bad(pud))
                        break;
 
-               pmd = pmd_offset(pud, addr);
-               pr_cont(", *pmd=%016llx", pmd_val(*pmd));
-               if (pmd_none(*pmd) || pmd_bad(*pmd))
+               pmdp = pmd_offset(pudp, addr);
+               pmd = READ_ONCE(*pmdp);
+               pr_cont(", pmd=%016llx", pmd_val(pmd));
+               if (pmd_none(pmd) || pmd_bad(pmd))
                        break;
 
-               pte = pte_offset_map(pmd, addr);
-               pr_cont(", *pte=%016llx", pte_val(*pte));
-               pte_unmap(pte);
+               ptep = pte_offset_map(pmdp, addr);
+               pte = READ_ONCE(*ptep);
+               pr_cont(", pte=%016llx", pte_val(pte));
+               pte_unmap(ptep);
        } while(0);
 
        pr_cont("\n");
@@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
                          pte_t entry, int dirty)
 {
        pteval_t old_pteval, pteval;
+       pte_t pte = READ_ONCE(*ptep);
 
-       if (pte_same(*ptep, entry))
+       if (pte_same(pte, entry))
                return 0;
 
        /* only preserve the access flags and write permission */
@@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
         * (calculated as: a & b == ~(~a | ~b)).
         */
        pte_val(entry) ^= PTE_RDONLY;
-       pteval = READ_ONCE(pte_val(*ptep));
+       pteval = pte_val(pte);
        do {
                old_pteval = pteval;
                pteval ^= PTE_RDONLY;
index 6cb0fa92a65162ecce1e84b8ef09177fcc54785d..ecc6818191df961eac49e6ca0c7d8b8d38d0c855 100644 (file)
@@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
                           pte_t *ptep, size_t *pgsize)
 {
-       pgd_t *pgd = pgd_offset(mm, addr);
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp = pgd_offset(mm, addr);
+       pud_t *pudp;
+       pmd_t *pmdp;
 
        *pgsize = PAGE_SIZE;
-       pud = pud_offset(pgd, addr);
-       pmd = pmd_offset(pud, addr);
-       if ((pte_t *)pmd == ptep) {
+       pudp = pud_offset(pgdp, addr);
+       pmdp = pmd_offset(pudp, addr);
+       if ((pte_t *)pmdp == ptep) {
                *pgsize = PMD_SIZE;
                return CONT_PMDS;
        }
@@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
        clear_flush(mm, addr, ptep, pgsize, ncontig);
 
-       for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
-               pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
-                        pte_val(pfn_pte(pfn, hugeprot)));
+       for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
-       }
 }
 
 void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *huge_pte_alloc(struct mm_struct *mm,
                      unsigned long addr, unsigned long sz)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pte_t *pte = NULL;
-
-       pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
-       pgd = pgd_offset(mm, addr);
-       pud = pud_alloc(mm, pgd, addr);
-       if (!pud)
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep = NULL;
+
+       pgdp = pgd_offset(mm, addr);
+       pudp = pud_alloc(mm, pgdp, addr);
+       if (!pudp)
                return NULL;
 
        if (sz == PUD_SIZE) {
-               pte = (pte_t *)pud;
+               ptep = (pte_t *)pudp;
        } else if (sz == (PAGE_SIZE * CONT_PTES)) {
-               pmd_t *pmd = pmd_alloc(mm, pud, addr);
+               pmdp = pmd_alloc(mm, pudp, addr);
 
                WARN_ON(addr & (sz - 1));
                /*
@@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                 * will be no pte_unmap() to correspond with this
                 * pte_alloc_map().
                 */
-               pte = pte_alloc_map(mm, pmd, addr);
+               ptep = pte_alloc_map(mm, pmdp, addr);
        } else if (sz == PMD_SIZE) {
                if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
-                   pud_none(*pud))
-                       pte = huge_pmd_share(mm, addr, pud);
+                   pud_none(READ_ONCE(*pudp)))
+                       ptep = huge_pmd_share(mm, addr, pudp);
                else
-                       pte = (pte_t *)pmd_alloc(mm, pud, addr);
+                       ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
        } else if (sz == (PMD_SIZE * CONT_PMDS)) {
-               pmd_t *pmd;
-
-               pmd = pmd_alloc(mm, pud, addr);
+               pmdp = pmd_alloc(mm, pudp, addr);
                WARN_ON(addr & (sz - 1));
-               return (pte_t *)pmd;
+               return (pte_t *)pmdp;
        }
 
-       pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
-              sz, pte, pte_val(*pte));
-       return pte;
+       return ptep;
 }
 
 pte_t *huge_pte_offset(struct mm_struct *mm,
                       unsigned long addr, unsigned long sz)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
 
-       pgd = pgd_offset(mm, addr);
-       pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
-       if (!pgd_present(*pgd))
+       pgdp = pgd_offset(mm, addr);
+       if (!pgd_present(READ_ONCE(*pgdp)))
                return NULL;
 
-       pud = pud_offset(pgd, addr);
-       if (sz != PUD_SIZE && pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (sz != PUD_SIZE && pud_none(pud))
                return NULL;
        /* hugepage or swap? */
-       if (pud_huge(*pud) || !pud_present(*pud))
-               return (pte_t *)pud;
+       if (pud_huge(pud) || !pud_present(pud))
+               return (pte_t *)pudp;
        /* table; check the next level */
 
        if (sz == CONT_PMD_SIZE)
                addr &= CONT_PMD_MASK;
 
-       pmd = pmd_offset(pud, addr);
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
        if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
-           pmd_none(*pmd))
+           pmd_none(pmd))
                return NULL;
-       if (pmd_huge(*pmd) || !pmd_present(*pmd))
-               return (pte_t *)pmd;
+       if (pmd_huge(pmd) || !pmd_present(pmd))
+               return (pte_t *)pmdp;
 
-       if (sz == CONT_PTE_SIZE) {
-               pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
-               return pte;
-       }
+       if (sz == CONT_PTE_SIZE)
+               return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 
        return NULL;
 }
@@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
        size_t pgsize;
        pte_t pte;
 
-       if (!pte_cont(*ptep)) {
+       if (!pte_cont(READ_ONCE(*ptep))) {
                ptep_set_wrprotect(mm, addr, ptep);
                return;
        }
@@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
        size_t pgsize;
        int ncontig;
 
-       if (!pte_cont(*ptep)) {
+       if (!pte_cont(READ_ONCE(*ptep))) {
                ptep_clear_flush(vma, addr, ptep);
                return;
        }
index 6e02e6fb4c7b9e12da9796b2e8a2be68ca143ae0..dabfc1ecda3d3a9d57a430f1641eca05c1114703 100644 (file)
@@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
        return __pa(p);
 }
 
-static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pmd_none(*pmd)) {
+       if (pmd_none(READ_ONCE(*pmdp))) {
                phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
                                             : kasan_alloc_zeroed_page(node);
-               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+               __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pte_offset_kimg(pmd, addr)
-                    : pte_offset_kernel(pmd, addr);
+       return early ? pte_offset_kimg(pmdp, addr)
+                    : pte_offset_kernel(pmdp, addr);
 }
 
-static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pud_none(*pud)) {
+       if (pud_none(READ_ONCE(*pudp))) {
                phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
                                             : kasan_alloc_zeroed_page(node);
-               __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
+               __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
+       return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
 }
 
-static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
+static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pgd_none(*pgd)) {
+       if (pgd_none(READ_ONCE(*pgdp))) {
                phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
                                             : kasan_alloc_zeroed_page(node);
-               __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
+               __pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
+       return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
 }
 
-static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
+       pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
 
        do {
                phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
                                              : kasan_alloc_zeroed_page(node);
                next = addr + PAGE_SIZE;
-               set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
-       } while (pte++, addr = next, addr != end && pte_none(*pte));
+               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
 }
 
-static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
+       pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
 
        do {
                next = pmd_addr_end(addr, end);
-               kasan_pte_populate(pmd, addr, next, node, early);
-       } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+               kasan_pte_populate(pmdp, addr, next, node, early);
+       } while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
 }
 
-static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
+       pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
 
        do {
                next = pud_addr_end(addr, end);
-               kasan_pmd_populate(pud, addr, next, node, early);
-       } while (pud++, addr = next, addr != end && pud_none(*pud));
+               kasan_pmd_populate(pudp, addr, next, node, early);
+       } while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
 }
 
 static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
                                      int node, bool early)
 {
        unsigned long next;
-       pgd_t *pgd;
+       pgd_t *pgdp;
 
-       pgd = pgd_offset_k(addr);
+       pgdp = pgd_offset_k(addr);
        do {
                next = pgd_addr_end(addr, end);
-               kasan_pud_populate(pgd, addr, next, node, early);
-       } while (pgd++, addr = next, addr != end);
+               kasan_pud_populate(pgdp, addr, next, node, early);
+       } while (pgdp++, addr = next, addr != end);
 }
 
 /* The early shadow maps everything to a single page of zeroes */
@@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
  */
 void __init kasan_copy_shadow(pgd_t *pgdir)
 {
-       pgd_t *pgd, *pgd_new, *pgd_end;
+       pgd_t *pgdp, *pgdp_new, *pgdp_end;
 
-       pgd = pgd_offset_k(KASAN_SHADOW_START);
-       pgd_end = pgd_offset_k(KASAN_SHADOW_END);
-       pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+       pgdp = pgd_offset_k(KASAN_SHADOW_START);
+       pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
+       pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
        do {
-               set_pgd(pgd_new, *pgd);
-       } while (pgd++, pgd_new++, pgd != pgd_end);
+               set_pgd(pgdp_new, READ_ONCE(*pgdp));
+       } while (pgdp++, pgdp_new++, pgdp != pgdp_end);
 }
 
 static void __init clear_pgds(unsigned long start,
index 4694cda823c9541527b95f269658bdbc4b8243d7..3161b853f29e1d35a21b0da5a01f571c995616f3 100644 (file)
@@ -125,45 +125,48 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
        return ((old ^ new) & ~mask) == 0;
 }
 
-static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
                     phys_addr_t phys, pgprot_t prot)
 {
-       pte_t *pte;
+       pte_t *ptep;
 
-       pte = pte_set_fixmap_offset(pmd, addr);
+       ptep = pte_set_fixmap_offset(pmdp, addr);
        do {
-               pte_t old_pte = *pte;
+               pte_t old_pte = READ_ONCE(*ptep);
 
-               set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
+               set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
 
                /*
                 * After the PTE entry has been populated once, we
                 * only allow updates to the permission attributes.
                 */
-               BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+               BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
+                                             READ_ONCE(pte_val(*ptep))));
 
                phys += PAGE_SIZE;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+       } while (ptep++, addr += PAGE_SIZE, addr != end);
 
        pte_clear_fixmap();
 }
 
-static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
                                unsigned long end, phys_addr_t phys,
                                pgprot_t prot,
                                phys_addr_t (*pgtable_alloc)(void),
                                int flags)
 {
        unsigned long next;
+       pmd_t pmd = READ_ONCE(*pmdp);
 
-       BUG_ON(pmd_sect(*pmd));
-       if (pmd_none(*pmd)) {
+       BUG_ON(pmd_sect(pmd));
+       if (pmd_none(pmd)) {
                phys_addr_t pte_phys;
                BUG_ON(!pgtable_alloc);
                pte_phys = pgtable_alloc();
-               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+               __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+               pmd = READ_ONCE(*pmdp);
        }
-       BUG_ON(pmd_bad(*pmd));
+       BUG_ON(pmd_bad(pmd));
 
        do {
                pgprot_t __prot = prot;
@@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
                    (flags & NO_CONT_MAPPINGS) == 0)
                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-               init_pte(pmd, addr, next, phys, __prot);
+               init_pte(pmdp, addr, next, phys, __prot);
 
                phys += next - addr;
        } while (addr = next, addr != end);
 }
 
-static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
                     phys_addr_t phys, pgprot_t prot,
                     phys_addr_t (*pgtable_alloc)(void), int flags)
 {
        unsigned long next;
-       pmd_t *pmd;
+       pmd_t *pmdp;
 
-       pmd = pmd_set_fixmap_offset(pud, addr);
+       pmdp = pmd_set_fixmap_offset(pudp, addr);
        do {
-               pmd_t old_pmd = *pmd;
+               pmd_t old_pmd = READ_ONCE(*pmdp);
 
                next = pmd_addr_end(addr, end);
 
                /* try section mapping first */
                if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
                    (flags & NO_BLOCK_MAPPINGS) == 0) {
-                       pmd_set_huge(pmd, phys, prot);
+                       pmd_set_huge(pmdp, phys, prot);
 
                        /*
                         * After the PMD entry has been populated once, we
                         * only allow updates to the permission attributes.
                         */
                        BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
-                                                     pmd_val(*pmd)));
+                                                     READ_ONCE(pmd_val(*pmdp))));
                } else {
-                       alloc_init_cont_pte(pmd, addr, next, phys, prot,
+                       alloc_init_cont_pte(pmdp, addr, next, phys, prot,
                                            pgtable_alloc, flags);
 
                        BUG_ON(pmd_val(old_pmd) != 0 &&
-                              pmd_val(old_pmd) != pmd_val(*pmd));
+                              pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
                }
                phys += next - addr;
-       } while (pmd++, addr = next, addr != end);
+       } while (pmdp++, addr = next, addr != end);
 
        pmd_clear_fixmap();
 }
 
-static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
                                unsigned long end, phys_addr_t phys,
                                pgprot_t prot,
                                phys_addr_t (*pgtable_alloc)(void), int flags)
 {
        unsigned long next;
+       pud_t pud = READ_ONCE(*pudp);
 
        /*
         * Check for initial section mappings in the pgd/pud.
         */
-       BUG_ON(pud_sect(*pud));
-       if (pud_none(*pud)) {
+       BUG_ON(pud_sect(pud));
+       if (pud_none(pud)) {
                phys_addr_t pmd_phys;
                BUG_ON(!pgtable_alloc);
                pmd_phys = pgtable_alloc();
-               __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+               __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+               pud = READ_ONCE(*pudp);
        }
-       BUG_ON(pud_bad(*pud));
+       BUG_ON(pud_bad(pud));
 
        do {
                pgprot_t __prot = prot;
@@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
                    (flags & NO_CONT_MAPPINGS) == 0)
                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-               init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+               init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
 
                phys += next - addr;
        } while (addr = next, addr != end);
@@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
        return true;
 }
 
-static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
-                                 phys_addr_t phys, pgprot_t prot,
-                                 phys_addr_t (*pgtable_alloc)(void),
-                                 int flags)
+static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+                          phys_addr_t phys, pgprot_t prot,
+                          phys_addr_t (*pgtable_alloc)(void),
+                          int flags)
 {
-       pud_t *pud;
        unsigned long next;
+       pud_t *pudp;
+       pgd_t pgd = READ_ONCE(*pgdp);
 
-       if (pgd_none(*pgd)) {
+       if (pgd_none(pgd)) {
                phys_addr_t pud_phys;
                BUG_ON(!pgtable_alloc);
                pud_phys = pgtable_alloc();
-               __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
+               __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
+               pgd = READ_ONCE(*pgdp);
        }
-       BUG_ON(pgd_bad(*pgd));
+       BUG_ON(pgd_bad(pgd));
 
-       pud = pud_set_fixmap_offset(pgd, addr);
+       pudp = pud_set_fixmap_offset(pgdp, addr);
        do {
-               pud_t old_pud = *pud;
+               pud_t old_pud = READ_ONCE(*pudp);
 
                next = pud_addr_end(addr, end);
 
@@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
                 */
                if (use_1G_block(addr, next, phys) &&
                    (flags & NO_BLOCK_MAPPINGS) == 0) {
-                       pud_set_huge(pud, phys, prot);
+                       pud_set_huge(pudp, phys, prot);
 
                        /*
                         * After the PUD entry has been populated once, we
                         * only allow updates to the permission attributes.
                         */
                        BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
-                                                     pud_val(*pud)));
+                                                     READ_ONCE(pud_val(*pudp))));
                } else {
-                       alloc_init_cont_pmd(pud, addr, next, phys, prot,
+                       alloc_init_cont_pmd(pudp, addr, next, phys, prot,
                                            pgtable_alloc, flags);
 
                        BUG_ON(pud_val(old_pud) != 0 &&
-                              pud_val(old_pud) != pud_val(*pud));
+                              pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
                }
                phys += next - addr;
-       } while (pud++, addr = next, addr != end);
+       } while (pudp++, addr = next, addr != end);
 
        pud_clear_fixmap();
 }
@@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
                                 int flags)
 {
        unsigned long addr, length, end, next;
-       pgd_t *pgd = pgd_offset_raw(pgdir, virt);
+       pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 
        /*
         * If the virtual and physical address don't have the same offset
@@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
        end = addr + length;
        do {
                next = pgd_addr_end(addr, end);
-               alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+               alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
                               flags);
                phys += next - addr;
-       } while (pgd++, addr = next, addr != end);
+       } while (pgdp++, addr = next, addr != end);
 }
 
 static phys_addr_t pgd_pgtable_alloc(void)
@@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
        flush_tlb_kernel_range(virt, virt + size);
 }
 
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
                                  phys_addr_t end, pgprot_t prot, int flags)
 {
-       __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+       __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
                             prot, early_pgtable_alloc, flags);
 }
 
@@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
                            PAGE_KERNEL_RO);
 }
 
-static void __init map_mem(pgd_t *pgd)
+static void __init map_mem(pgd_t *pgdp)
 {
        phys_addr_t kernel_start = __pa_symbol(_text);
        phys_addr_t kernel_end = __pa_symbol(__init_begin);
@@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
                if (memblock_is_nomap(reg))
                        continue;
 
-               __map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+               __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
        }
 
        /*
@@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
         * Note that contiguous mappings cannot be remapped in this way,
         * so we should avoid them here.
         */
-       __map_memblock(pgd, kernel_start, kernel_end,
+       __map_memblock(pgdp, kernel_start, kernel_end,
                       PAGE_KERNEL, NO_CONT_MAPPINGS);
        memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 
@@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
         * through /sys/kernel/kexec_crash_size interface.
         */
        if (crashk_res.end) {
-               __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+               __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
                               PAGE_KERNEL,
                               NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
                memblock_clear_nomap(crashk_res.start,
@@ -499,7 +506,7 @@ void mark_rodata_ro(void)
        debug_checkwx();
 }
 
-static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
                                      pgprot_t prot, struct vm_struct *vma,
                                      int flags, unsigned long vm_flags)
 {
@@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
        BUG_ON(!PAGE_ALIGNED(pa_start));
        BUG_ON(!PAGE_ALIGNED(size));
 
-       __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+       __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
                             early_pgtable_alloc, flags);
 
        if (!(vm_flags & VM_NO_GUARD))
@@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
 /*
  * Create fine-grained mappings for the kernel.
  */
-static void __init map_kernel(pgd_t *pgd)
+static void __init map_kernel(pgd_t *pgdp)
 {
        static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
                                vmlinux_initdata, vmlinux_data;
@@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
         * Only rodata will be remapped with different permissions later on,
         * all other segments are allowed to use contiguous mappings.
         */
-       map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
+       map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
                           VM_NO_GUARD);
-       map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+       map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
                           &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
-       map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+       map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
                           &vmlinux_inittext, 0, VM_NO_GUARD);
-       map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+       map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
                           &vmlinux_initdata, 0, VM_NO_GUARD);
-       map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
+       map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
 
-       if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+       if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
                /*
                 * The fixmap falls in a separate pgd to the kernel, and doesn't
                 * live in the carveout for the swapper_pg_dir. We can simply
                 * re-use the existing dir for the fixmap.
                 */
-               set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
-                       *pgd_offset_k(FIXADDR_START));
+               set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
+                       READ_ONCE(*pgd_offset_k(FIXADDR_START)));
        } else if (CONFIG_PGTABLE_LEVELS > 3) {
                /*
                 * The fixmap shares its top level pgd entry with the kernel
@@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
                 * entry instead.
                 */
                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-               pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+               pud_populate(&init_mm,
+                            pud_set_fixmap_offset(pgdp, FIXADDR_START),
                             lm_alias(bm_pmd));
                pud_clear_fixmap();
        } else {
                BUG();
        }
 
-       kasan_copy_shadow(pgd);
+       kasan_copy_shadow(pgdp);
 }
 
 /*
@@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
 void __init paging_init(void)
 {
        phys_addr_t pgd_phys = early_pgtable_alloc();
-       pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+       pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
 
-       map_kernel(pgd);
-       map_mem(pgd);
+       map_kernel(pgdp);
+       map_mem(pgdp);
 
        /*
         * We want to reuse the original swapper_pg_dir so we don't have to
@@ -635,7 +643,7 @@ void __init paging_init(void)
         * To do this we need to go via a temporary pgd.
         */
        cpu_replace_ttbr1(__va(pgd_phys));
-       memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+       memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
        cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
        pgd_clear_fixmap();
@@ -655,37 +663,40 @@ void __init paging_init(void)
  */
 int kern_addr_valid(unsigned long addr)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
+       pte_t *ptep, pte;
 
        if ((((long)addr) >> VA_BITS) != -1UL)
                return 0;
 
-       pgd = pgd_offset_k(addr);
-       if (pgd_none(*pgd))
+       pgdp = pgd_offset_k(addr);
+       if (pgd_none(READ_ONCE(*pgdp)))
                return 0;
 
-       pud = pud_offset(pgd, addr);
-       if (pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (pud_none(pud))
                return 0;
 
-       if (pud_sect(*pud))
-               return pfn_valid(pud_pfn(*pud));
+       if (pud_sect(pud))
+               return pfn_valid(pud_pfn(pud));
 
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
+       if (pmd_none(pmd))
                return 0;
 
-       if (pmd_sect(*pmd))
-               return pfn_valid(pmd_pfn(*pmd));
+       if (pmd_sect(pmd))
+               return pfn_valid(pmd_pfn(pmd));
 
-       pte = pte_offset_kernel(pmd, addr);
-       if (pte_none(*pte))
+       ptep = pte_offset_kernel(pmdp, addr);
+       pte = READ_ONCE(*ptep);
+       if (pte_none(pte))
                return 0;
 
-       return pfn_valid(pte_pfn(*pte));
+       return pfn_valid(pte_pfn(pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 #if !ARM64_SWAPPER_USES_SECTION_MAPS
@@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 {
        unsigned long addr = start;
        unsigned long next;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
 
        do {
                next = pmd_addr_end(addr, end);
 
-               pgd = vmemmap_pgd_populate(addr, node);
-               if (!pgd)
+               pgdp = vmemmap_pgd_populate(addr, node);
+               if (!pgdp)
                        return -ENOMEM;
 
-               pud = vmemmap_pud_populate(pgd, addr, node);
-               if (!pud)
+               pudp = vmemmap_pud_populate(pgdp, addr, node);
+               if (!pudp)
                        return -ENOMEM;
 
-               pmd = pmd_offset(pud, addr);
-               if (pmd_none(*pmd)) {
+               pmdp = pmd_offset(pudp, addr);
+               if (pmd_none(READ_ONCE(*pmdp))) {
                        void *p = NULL;
 
                        p = vmemmap_alloc_block_buf(PMD_SIZE, node);
                        if (!p)
                                return -ENOMEM;
 
-                       pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+                       pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
                } else
-                       vmemmap_verify((pte_t *)pmd, node, addr, next);
+                       vmemmap_verify((pte_t *)pmdp, node, addr, next);
        } while (addr = next, addr != end);
 
        return 0;
@@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
 
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
+       pgd_t *pgdp = pgd_offset_k(addr);
+       pgd_t pgd = READ_ONCE(*pgdp);
 
-       BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+       BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
 
-       return pud_offset_kimg(pgd, addr);
+       return pud_offset_kimg(pgdp, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
 {
-       pud_t *pud = fixmap_pud(addr);
+       pud_t *pudp = fixmap_pud(addr);
+       pud_t pud = READ_ONCE(*pudp);
 
-       BUG_ON(pud_none(*pud) || pud_bad(*pud));
+       BUG_ON(pud_none(pud) || pud_bad(pud));
 
-       return pmd_offset_kimg(pud, addr);
+       return pmd_offset_kimg(pudp, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
@@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
  */
 void __init early_fixmap_init(void)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp, pgd;
+       pud_t *pudp;
+       pmd_t *pmdp;
        unsigned long addr = FIXADDR_START;
 
-       pgd = pgd_offset_k(addr);
+       pgdp = pgd_offset_k(addr);
+       pgd = READ_ONCE(*pgdp);
        if (CONFIG_PGTABLE_LEVELS > 3 &&
-           !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
+           !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
                /*
                 * We only end up here if the kernel mapping and the fixmap
                 * share the top level pgd entry, which should only happen on
                 * 16k/4 levels configurations.
                 */
                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-               pud = pud_offset_kimg(pgd, addr);
+               pudp = pud_offset_kimg(pgdp, addr);
        } else {
-               if (pgd_none(*pgd))
-                       __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
-               pud = fixmap_pud(addr);
+               if (pgd_none(pgd))
+                       __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+               pudp = fixmap_pud(addr);
        }
-       if (pud_none(*pud))
-               __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
-       pmd = fixmap_pmd(addr);
-       __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
+       if (pud_none(READ_ONCE(*pudp)))
+               __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+       pmdp = fixmap_pmd(addr);
+       __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 
        /*
         * The boot-ioremap range spans multiple pmds, for which
@@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 
-       if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
-            || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+       if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+            || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
                WARN_ON(1);
-               pr_warn("pmd %p != %p, %p\n",
-                       pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+               pr_warn("pmdp %p != %p, %p\n",
+                       pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
                        fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
                pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
                        fix_to_virt(FIX_BTMAP_BEGIN));
@@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
                               phys_addr_t phys, pgprot_t flags)
 {
        unsigned long addr = __fix_to_virt(idx);
-       pte_t *pte;
+       pte_t *ptep;
 
        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-       pte = fixmap_pte(addr);
+       ptep = fixmap_pte(addr);
 
        if (pgprot_val(flags)) {
-               set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+               set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
        } else {
-               pte_clear(&init_mm, addr, pte);
+               pte_clear(&init_mm, addr, ptep);
                flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
        }
 }
@@ -915,36 +929,36 @@ int __init arch_ioremap_pmd_supported(void)
        return 1;
 }
 
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 {
        pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
                                        pgprot_val(mk_sect_prot(prot)));
        BUG_ON(phys & ~PUD_MASK);
-       set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
+       set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
        return 1;
 }
 
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 {
        pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
                                        pgprot_val(mk_sect_prot(prot)));
        BUG_ON(phys & ~PMD_MASK);
-       set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+       set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
        return 1;
 }
 
-int pud_clear_huge(pud_t *pud)
+int pud_clear_huge(pud_t *pudp)
 {
-       if (!pud_sect(*pud))
+       if (!pud_sect(READ_ONCE(*pudp)))
                return 0;
-       pud_clear(pud);
+       pud_clear(pudp);
        return 1;
 }
 
-int pmd_clear_huge(pmd_t *pmd)
+int pmd_clear_huge(pmd_t *pmdp)
 {
-       if (!pmd_sect(*pmd))
+       if (!pmd_sect(READ_ONCE(*pmdp)))
                return 0;
-       pmd_clear(pmd);
+       pmd_clear(pmdp);
        return 1;
 }
index a682a0a2a0fa4d5db9175256e78100b1afde344a..a56359373d8b3592e6cde6891d9b44206bd96137 100644 (file)
@@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
                        void *data)
 {
        struct page_change_data *cdata = data;
-       pte_t pte = *ptep;
+       pte_t pte = READ_ONCE(*ptep);
 
        pte = clear_pte_bit(pte, cdata->clear_mask);
        pte = set_pte_bit(pte, cdata->set_mask);
@@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
  */
 bool kernel_page_present(struct page *page)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
+       pte_t *ptep;
        unsigned long addr = (unsigned long)page_address(page);
 
-       pgd = pgd_offset_k(addr);
-       if (pgd_none(*pgd))
+       pgdp = pgd_offset_k(addr);
+       if (pgd_none(READ_ONCE(*pgdp)))
                return false;
 
-       pud = pud_offset(pgd, addr);
-       if (pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (pud_none(pud))
                return false;
-       if (pud_sect(*pud))
+       if (pud_sect(pud))
                return true;
 
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
+       if (pmd_none(pmd))
                return false;
-       if (pmd_sect(*pmd))
+       if (pmd_sect(pmd))
                return true;
 
-       pte = pte_offset_kernel(pmd, addr);
-       return pte_valid(*pte);
+       ptep = pte_offset_kernel(pmdp, addr);
+       return pte_valid(READ_ONCE(*ptep));
 }
 #endif /* CONFIG_HIBERNATION */
 #endif /* CONFIG_DEBUG_PAGEALLOC */
index 71baed7e592a499196a1c7bc239dbbc3297d3bc2..c0af4761729986da832a8e844b256e64da8cb4c1 100644 (file)
@@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
        dc      cvac, cur_\()\type\()p          // Ensure any existing dirty
        dmb     sy                              // lines are written back before
        ldr     \type, [cur_\()\type\()p]       // loading the entry
-       tbz     \type, #0, next_\()\type        // Skip invalid entries
+       tbz     \type, #0, skip_\()\type        // Skip invalid and
+       tbnz    \type, #11, skip_\()\type       // non-global entries
        .endm
 
        .macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
        add     end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
 do_pgd:        __idmap_kpti_get_pgtable_ent    pgd
        tbnz    pgd, #1, walk_puds
-       __idmap_kpti_put_pgtable_ent_ng pgd
 next_pgd:
+       __idmap_kpti_put_pgtable_ent_ng pgd
+skip_pgd:
        add     cur_pgdp, cur_pgdp, #8
        cmp     cur_pgdp, end_pgdp
        b.ne    do_pgd
@@ -294,8 +296,9 @@ walk_puds:
        add     end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
 do_pud:        __idmap_kpti_get_pgtable_ent    pud
        tbnz    pud, #1, walk_pmds
-       __idmap_kpti_put_pgtable_ent_ng pud
 next_pud:
+       __idmap_kpti_put_pgtable_ent_ng pud
+skip_pud:
        add     cur_pudp, cur_pudp, 8
        cmp     cur_pudp, end_pudp
        b.ne    do_pud
@@ -314,8 +317,9 @@ walk_pmds:
        add     end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
 do_pmd:        __idmap_kpti_get_pgtable_ent    pmd
        tbnz    pmd, #1, walk_ptes
-       __idmap_kpti_put_pgtable_ent_ng pmd
 next_pmd:
+       __idmap_kpti_put_pgtable_ent_ng pmd
+skip_pmd:
        add     cur_pmdp, cur_pmdp, #8
        cmp     cur_pmdp, end_pmdp
        b.ne    do_pmd
@@ -333,7 +337,7 @@ walk_ptes:
        add     end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
 do_pte:        __idmap_kpti_get_pgtable_ent    pte
        __idmap_kpti_put_pgtable_ent_ng pte
-next_pte:
+skip_pte:
        add     cur_ptep, cur_ptep, #8
        cmp     cur_ptep, end_ptep
        b.ne    do_pte
index 0b4c65a1af25fbe264766ee1e14c4c0bf19bc5e6..498f3da3f225d2ed8479af540bbbc22786e2c55d 100644 (file)
@@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
 obj-y                          += esi_stub.o   # must be in kernel proper
 endif
 obj-$(CONFIG_INTEL_IOMMU)      += pci-dma.o
-obj-$(CONFIG_SWIOTLB)          += pci-swiotlb.o
 
 obj-$(CONFIG_BINFMT_ELF)       += elfcore.o
 
index 19c88d770054617bfc23b15c81c20b12a7f7cc3f..fcf9af492d60229a491337e56b1ebc9adf60f7fb 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/errno.h>
 #include <linux/percpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/spinlock.h>
 
 #include <asm/mips-cps.h>
@@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
 phys_addr_t __weak mips_cpc_default_phys_base(void)
 {
+       struct device_node *cpc_node;
+       struct resource res;
+       int err;
+
+       cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+       if (cpc_node) {
+               err = of_address_to_resource(cpc_node, 0, &res);
+               if (!err)
+                       return res.start;
+       }
+
        return 0;
 }
 
index 85bc601e9a0d43ffd89669eac4b359de99566083..5f8b0a9e30b3d6faec9befca1e759a8f9263f8c8 100644 (file)
@@ -375,6 +375,7 @@ static void __init bootmem_init(void)
        unsigned long reserved_end;
        unsigned long mapstart = ~0UL;
        unsigned long bootmap_size;
+       phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
        bool bootmap_valid = false;
        int i;
 
@@ -395,7 +396,8 @@ static void __init bootmem_init(void)
        max_low_pfn = 0;
 
        /*
-        * Find the highest page frame number we have available.
+        * Find the highest page frame number we have available
+        * and the lowest used RAM address
         */
        for (i = 0; i < boot_mem_map.nr_map; i++) {
                unsigned long start, end;
@@ -407,6 +409,8 @@ static void __init bootmem_init(void)
                end = PFN_DOWN(boot_mem_map.map[i].addr
                                + boot_mem_map.map[i].size);
 
+               ramstart = min(ramstart, boot_mem_map.map[i].addr);
+
 #ifndef CONFIG_HIGHMEM
                /*
                 * Skip highmem here so we get an accurate max_low_pfn if low
@@ -436,6 +440,13 @@ static void __init bootmem_init(void)
                mapstart = max(reserved_end, start);
        }
 
+       /*
+        * Reserve any memory between the start of RAM and PHYS_OFFSET
+        */
+       if (ramstart > PHYS_OFFSET)
+               add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
+                                 BOOT_MEM_RESERVED);
+
        if (min_low_pfn >= max_low_pfn)
                panic("Incorrect memory mapping !!!");
        if (min_low_pfn > ARCH_PFN_OFFSET) {
@@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
 
        add_memory_region(start, size, BOOT_MEM_RAM);
 
-       if (start && start > PHYS_OFFSET)
-               add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
-                               BOOT_MEM_RESERVED);
        return 0;
 }
 early_param("mem", early_parse_mem);
index 87dcac2447c8df20a572139d5053624e91acf2ca..9d41732a9146a31545b9114812cb12c669196478 100644 (file)
@@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
         */
 }
 
-void __init bmips_cpu_setup(void)
+void bmips_cpu_setup(void)
 {
        void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
        u32 __maybe_unused cfg;
index 30a155c0a6b07e31ca69d5a7418fb19a1f6e7872..c615abdce119ea34ff6c33d02109cd700036db64 100644 (file)
@@ -16,6 +16,7 @@
 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX        PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
index 949d691094a46d674880dd1e54da971a4161f815..67c5475311ee6e03b29486f8518dc74758263224 100644 (file)
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
  * keeping the prototype consistent across the two formats.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-                       unsigned int subpg_index, unsigned long hidx)
+                                        unsigned int subpg_index, unsigned long hidx,
+                                        int offset)
 {
        return (hidx << H_PAGE_F_GIX_SHIFT) &
                (H_PAGE_F_SECOND | H_PAGE_F_GIX);
index 338b7da468cef309fa2b787c852e96ab05f014e5..3bcf269f8f55470097ac56680685321bf13e62ba 100644 (file)
@@ -45,7 +45,7 @@
  * generic accessors and iterators here
  */
 #define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
 {
        real_pte_t rpte;
        unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
         */
        smp_rmb();
 
-       hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+       hidxp = (unsigned long *)(ptep + offset);
        rpte.hidx = *hidxp;
        return rpte;
 }
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
  * expected to modify the PTE bits accordingly and commit the PTE to memory.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-               unsigned int subpg_index, unsigned long hidx)
+                                        unsigned int subpg_index,
+                                        unsigned long hidx, int offset)
 {
-       unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+       unsigned long *hidxp = (unsigned long *)(ptep + offset);
 
        rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
        *hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
 }
 
 #define H_PTE_TABLE_SIZE       PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
 #define H_PMD_TABLE_SIZE       ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
                                 (sizeof(unsigned long) << PMD_INDEX_SIZE))
 #else
 #define H_PMD_TABLE_SIZE       (sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE       ((sizeof(pud_t) << PUD_INDEX_SIZE) +    \
+                                (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
 #define H_PUD_TABLE_SIZE       (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
 #define H_PGD_TABLE_SIZE       (sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
index 0920eff731b385221edeb46a04ed5f280ad76ff8..935adcd92a81655ed79e9c21fb6e196233ca69b1 100644 (file)
@@ -23,7 +23,8 @@
                                 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
 #define H_PGTABLE_RANGE                (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+       defined(CONFIG_PPC_64K_PAGES)
 /*
  * only with hash 64k we need to use the second half of pmd page table
  * to store pointer to deposited pgtable_t
 #else
 #define H_PMD_CACHE_INDEX      H_PMD_INDEX_SIZE
 #endif
+/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX      (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX      (H_PUD_INDEX_SIZE)
+#endif
 /*
  * Define the address range of the kernel non-linear virtual area
  */
index 1fcfa425cefaf205fe787cc9a480265aec758e0f..4746bc68d446d8e95427e67a86c3493a1d1f1668 100644 (file)
@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+       pgd_t *pgd;
+
        if (radix_enabled())
                return radix__pgd_alloc(mm);
-       return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
-               pgtable_gfp_flags(mm, GFP_KERNEL));
+
+       pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+                              pgtable_gfp_flags(mm, GFP_KERNEL));
+       memset(pgd, 0, PGD_TABLE_SIZE);
+
+       return pgd;
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+       return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
                pgtable_gfp_flags(mm, GFP_KERNEL));
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-       kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+       kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
         * ahead and flush the page walk cache
         */
        flush_tlb_pgtable(tlb, address);
-        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+       pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
index 51017726d49539fda8cba5b346460aeba104c3f9..a6b9f1d746002cd3479686603c76322d52676db9 100644 (file)
@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
 extern unsigned long __pud_index_size;
 extern unsigned long __pgd_index_size;
 extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
 #define PTE_INDEX_SIZE  __pte_index_size
 #define PMD_INDEX_SIZE  __pmd_index_size
 #define PUD_INDEX_SIZE  __pud_index_size
 #define PGD_INDEX_SIZE  __pgd_index_size
 #define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
 /*
  * Because of use of pte fragments and THP, size of page table
  * are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
  */
 #ifndef __real_pte
 
-#define __real_pte(e,p)                ((real_pte_t){(e)})
+#define __real_pte(e, p, o)            ((real_pte_t){(e)})
 #define __rpte_to_pte(r)       ((r).pte)
 #define __rpte_to_hidx(r,index)        (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
 
index 176dfb73d42c073df181013c6497cb42f9c8f85d..471b2274fbeba815f04c1957d975f0f1a74bcdbe 100644 (file)
@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
                                          EXC_HV, SOFTEN_TEST_HV, bitmask)
 
 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)           \
-       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
        EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*
index 88e5e8f17e9896e5a051845235bfa0c822684552..855e17d158b11f04120b9b39a352af91cedb95c8 100644 (file)
 #define PACA_IRQ_HMI           0x20
 #define PACA_IRQ_PMI           0x40
 
+/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK        (PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK        (PACA_IRQ_EE)
+#endif
+
 /*
  * flags for paca->irq_soft_mask
  */
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
 static inline void may_hard_irq_enable(void)
 {
        get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-       if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+       if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
                __hard_irq_enable();
 }
 
index 9dcbfa6bbb91e740e483fa6c5c56b8422b8440c8..d8b1e8e7e035b31acd7372eee6bd8017166987d4 100644 (file)
@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
        return false;
 }
 
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
 #endif /* CONFIG_KEXEC_CORE */
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */
index 504a3c36ce5c9b311a9c8864d112792ae60fab9f..03bbd1149530d3115d7c9e84c66893428fe4af43 100644 (file)
@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX        PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
index abddf5830ad5550ee2c72875b209d9546afec579..5c5f75d005ada6289633455dc89bc0d2641c2e54 100644 (file)
@@ -27,6 +27,7 @@
 #else
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
 #endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
 
 /*
  * Define the address range of the kernel non-linear virtual area
index 88187c285c70d5d823ccdf0a572cb8a7e24a6341..9f421641a35c8240cbacf192f6a1b22b4f33c63c 100644 (file)
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
 
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+       numa_cpu_lookup_table[cpu] = node;
+}
+
 static inline int early_cpu_to_node(int cpu)
 {
        int nid;
@@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
 {
        return 0;
 }
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
 extern int start_topology_update(void);
 extern int stop_topology_update(void);
 extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
 #else
 static inline int start_topology_update(void)
 {
@@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
 {
        return 0;
 }
+static inline int find_and_online_cpu_nid(int cpu)
+{
+       return 0;
+}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
index ee832d344a5a265018d8fa98f0c38c0a94549db5..9b6e653e501a1264a8bfa398400dd88ecfa38d25 100644 (file)
@@ -943,6 +943,8 @@ kernel_dbg_exc:
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
  * accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
  */
 
 .macro masked_interrupt_book3e paca_irq full_mask
index 243d072a225aac1f7c7eaa69b6e5ef8cd21ce2c6..3ac87e53b3da0fdc0c41bd967ac731f5d98d6efb 100644 (file)
@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
  *   triggered and won't automatically refire.
  * - If it was a HMI we return immediately since we handled it in realmode
  *   and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
  * This is called with r10 containing the value to OR to the paca field.
  */
 #define MASKED_INTERRUPT(_H)                           \
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt:                                   \
        ori     r10,r10,0xffff;                         \
        mtspr   SPRN_DEC,r10;                           \
        b       MASKED_DEC_HANDLER_LABEL;               \
-1:     andi.   r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);  \
-       bne     2f;                                     \
+1:     andi.   r10,r10,PACA_IRQ_MUST_HARD_MASK;        \
+       beq     2f;                                     \
        mfspr   r10,SPRN_##_H##SRR1;                    \
        xori    r10,r10,MSR_EE; /* clear MSR_EE */      \
        mtspr   SPRN_##_H##SRR1,r10;                    \
index 5a8bfee6e1877c58ae607445ea77af1ed6b2e869..04d0bbd7a1dd03e13e47e4c5e10a647672955ea3 100644 (file)
@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_create_file(s, &dev_attr_pir);
 
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
+       if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+               !firmware_has_feature(FW_FEATURE_LPAR))
                device_create_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_remove_file(s, &dev_attr_pir);
 
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
+       if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+               !firmware_has_feature(FW_FEATURE_LPAR))
                device_remove_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
index 1604110c42386c39dea239aaa1a564ba5bc7b38e..916844f99c64e59655d3372ac4e69c731f0751e9 100644 (file)
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
        u32 i, n_lmbs;
 
        n_lmbs = of_read_number(prop++, 1);
+       if (n_lmbs == 0)
+               return;
 
        for (i = 0; i < n_lmbs; i++) {
                read_drconf_v1_cell(&lmb, &prop);
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
        u32 i, j, lmb_sets;
 
        lmb_sets = of_read_number(prop++, 1);
+       if (lmb_sets == 0)
+               return;
 
        for (i = 0; i < lmb_sets; i++) {
                read_drconf_v2_cell(&dr_cell, &prop);
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
        struct drmem_lmb *lmb;
 
        drmem_info->n_lmbs = of_read_number(prop++, 1);
+       if (drmem_info->n_lmbs == 0)
+               return;
 
        drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
                                   GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
        int lmb_index;
 
        lmb_sets = of_read_number(prop++, 1);
+       if (lmb_sets == 0)
+               return;
 
        /* first pass, calculate the number of LMBs */
        p = prop;
index 5a69b51d08a3615f319a325536a209b6399db2a7..d573d7d07f25f4d718043a71e199475d3df9597c 100644 (file)
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
         * need to add in 0x1 if it's a read-only user page
         */
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ repeat:
                        return -1;
                }
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 2253bbc6a599d7804cb81dc49b2c6f82b81a435d..e601d95c3b20271d7b9cc6483cab402d51d80436 100644 (file)
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 
        subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
        vpn  = hpt_vpn(ea, vsid, ssize);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
        /*
         *None of the sub 4k page is hashed
         */
@@ -214,7 +214,7 @@ repeat:
                return -1;
        }
 
-       new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+       new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
        new_pte |= H_PAGE_HASHPTE;
 
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
@@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
        } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -327,7 +327,7 @@ repeat:
                }
 
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 7d07c7e17db6708334ea38cad711e1f5c32de1c6..cf290d415dcd8e9e314c63134c49cbd687e63fd7 100644 (file)
@@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
        __pmd_index_size = H_PMD_INDEX_SIZE;
        __pud_index_size = H_PUD_INDEX_SIZE;
        __pgd_index_size = H_PGD_INDEX_SIZE;
+       __pud_cache_index = H_PUD_CACHE_INDEX;
        __pmd_cache_index = H_PMD_CACHE_INDEX;
        __pte_table_size = H_PTE_TABLE_SIZE;
        __pmd_table_size = H_PMD_TABLE_SIZE;
index 12511f5a015fcfee349e9dd7ac00f6ecd3b8df90..b320f5097a0616dce810c31e42fa42659475d4c3 100644 (file)
@@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        unsigned long vpn;
        unsigned long old_pte, new_pte;
        unsigned long rflags, pa, sz;
-       long slot;
+       long slot, offset;
 
        BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
@@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       if (unlikely(mmu_psize == MMU_PAGE_16G))
+               offset = PTRS_PER_PUD;
+       else
+               offset = PTRS_PER_PMD;
+       rpte = __real_pte(__pte(old_pte), ptep, offset);
 
        sz = ((1UL) << shift);
        if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
                        return -1;
                }
 
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
        }
 
        /*
index eb8c6c8c4851a9a7e25a555dd3e61c7b0613fb4e..2b656e67f2eaaa3914cd74d1cd57e36a5060486b 100644 (file)
@@ -100,6 +100,6 @@ void pgtable_cache_init(void)
         * same size as either the pgd or pmd index except with THP enabled
         * on book3s 64
         */
-       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
-               pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+       if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+               pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
 }
index 314d19ab9385e038a4f38c18a50364da4873eb86..edd8d0bc9364f2843688498b221d90f53647390d 100644 (file)
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
                numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
-       numa_cpu_lookup_table[cpu] = node;
-}
-
 static void map_cpu_to_node(int cpu, int node)
 {
        update_numa_cpu_lookup_table(cpu, node);
index 573a9a2ee4555c53ab2416e70fb4fdd05fb464da..2e10a964e29080149fe60b5d9a2220fdc710bb79 100644 (file)
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
 #include <linux/string_helpers.h>
+#include <linux/stop_machine.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
 #include <asm/dma.h>
 #include <asm/machdep.h>
 #include <asm/mmu.h>
@@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
                     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
        asm volatile("eieio; tlbsync; ptesync" : : : "memory");
        trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
+
+       /*
+        * The init_mm context is given the first available (non-zero) PID,
+        * which is the "guard PID" and contains no page table. PIDR should
+        * never be set to zero because that duplicates the kernel address
+        * space at the 0x0... offset (quadrant 0)!
+        *
+        * An arbitrary PID that may later be allocated by the PID allocator
+        * for userspace processes must not be used either, because that
+        * would cause stale user mappings for that PID on CPUs outside of
+        * the TLB invalidation scheme (because it won't be in mm_cpumask).
+        *
+        * So permanently carve out one PID for the purpose of a guard PID.
+        */
+       init_mm.context.id = mmu_base_pid;
+       mmu_base_pid++;
 }
 
 static void __init radix_init_partition_table(void)
@@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
        __pmd_index_size = RADIX_PMD_INDEX_SIZE;
        __pud_index_size = RADIX_PUD_INDEX_SIZE;
        __pgd_index_size = RADIX_PGD_INDEX_SIZE;
+       __pud_cache_index = RADIX_PUD_INDEX_SIZE;
        __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
        __pte_table_size = RADIX_PTE_TABLE_SIZE;
        __pmd_table_size = RADIX_PMD_TABLE_SIZE;
@@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
 
        radix_init_iamr();
        radix_init_pgtable();
-
+       /* Switch to the guard PID before turning on MMU */
+       radix__switch_mmu_context(NULL, &init_mm);
        if (cpu_has_feature(CPU_FTR_HVMODE))
                tlbiel_all();
 }
@@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
        }
        radix_init_iamr();
 
+       radix__switch_mmu_context(NULL, &init_mm);
        if (cpu_has_feature(CPU_FTR_HVMODE))
                tlbiel_all();
 }
@@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
        pud_clear(pud);
 }
 
+struct change_mapping_params {
+       pte_t *pte;
+       unsigned long start;
+       unsigned long end;
+       unsigned long aligned_start;
+       unsigned long aligned_end;
+};
+
+static int stop_machine_change_mapping(void *data)
+{
+       struct change_mapping_params *params =
+                       (struct change_mapping_params *)data;
+
+       if (!data)
+               return -1;
+
+       spin_unlock(&init_mm.page_table_lock);
+       pte_clear(&init_mm, params->aligned_start, params->pte);
+       create_physical_mapping(params->aligned_start, params->start);
+       create_physical_mapping(params->end, params->aligned_end);
+       spin_lock(&init_mm.page_table_lock);
+       return 0;
+}
+
 static void remove_pte_table(pte_t *pte_start, unsigned long addr,
                             unsigned long end)
 {
@@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
        }
 }
 
+/*
+ * clear the pte and potentially split the mapping helper
+ */
+static void split_kernel_mapping(unsigned long addr, unsigned long end,
+                               unsigned long size, pte_t *pte)
+{
+       unsigned long mask = ~(size - 1);
+       unsigned long aligned_start = addr & mask;
+       unsigned long aligned_end = addr + size;
+       struct change_mapping_params params;
+       bool split_region = false;
+
+       if ((end - addr) < size) {
+               /*
+                * We're going to clear the PTE, but not flushed
+                * the mapping, time to remap and flush. The
+                * effects if visible outside the processor or
+                * if we are running in code close to the
+                * mapping we cleared, we are in trouble.
+                */
+               if (overlaps_kernel_text(aligned_start, addr) ||
+                       overlaps_kernel_text(end, aligned_end)) {
+                       /*
+                        * Hack, just return, don't pte_clear
+                        */
+                       WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
+                                 "text, not splitting\n", addr, end);
+                       return;
+               }
+               split_region = true;
+       }
+
+       if (split_region) {
+               params.pte = pte;
+               params.start = addr;
+               params.end = end;
+               params.aligned_start = addr & ~(size - 1);
+               params.aligned_end = min_t(unsigned long, aligned_end,
+                               (unsigned long)__va(memblock_end_of_DRAM()));
+               stop_machine(stop_machine_change_mapping, &params, NULL);
+               return;
+       }
+
+       pte_clear(&init_mm, addr, pte);
+}
+
 static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
                             unsigned long end)
 {
@@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
                        continue;
 
                if (pmd_huge(*pmd)) {
-                       if (!IS_ALIGNED(addr, PMD_SIZE) ||
-                           !IS_ALIGNED(next, PMD_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pmd);
+                       split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
                        continue;
                }
 
@@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
                        continue;
 
                if (pud_huge(*pud)) {
-                       if (!IS_ALIGNED(addr, PUD_SIZE) ||
-                           !IS_ALIGNED(next, PUD_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pud);
+                       split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
                        continue;
                }
 
@@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
                        continue;
 
                if (pgd_huge(*pgd)) {
-                       if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
-                           !IS_ALIGNED(next, PGDIR_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pgd);
+                       split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
                        continue;
                }
 
index c9a623c2d8a270a14966003258f035fb95d08cff..28c980eb4422284d788716e245934c679925ad86 100644 (file)
@@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
 EXPORT_SYMBOL(__pgd_index_size);
 unsigned long __pmd_cache_index;
 EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
 unsigned long __pte_table_size;
 EXPORT_SYMBOL(__pte_table_size);
 unsigned long __pmd_table_size;
@@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
        if (old & PATB_HR) {
                asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
                             "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+               asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+                            "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
                trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
        } else {
                asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
index 881ebd53ffc27c8840ae57b088c4d247ba7ef191..9b23f12e863cc14ff324b9c5ffed077c3a1012e8 100644 (file)
@@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
        unsigned int psize;
        int ssize;
        real_pte_t rpte;
-       int i;
+       int i, offset;
 
        i = batch->index;
 
@@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
                psize = get_slice_psize(mm, addr);
                /* Mask the address for the correct page size */
                addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+               if (unlikely(psize == MMU_PAGE_16G))
+                       offset = PTRS_PER_PUD;
+               else
+                       offset = PTRS_PER_PMD;
 #else
                BUG();
                psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
                 * support 64k pages, this might be different from the
                 * hardware page size encoded in the slice table. */
                addr &= PAGE_MASK;
+               offset = PTRS_PER_PTE;
        }
 
 
@@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
        }
        WARN_ON(vsid == 0);
        vpn = hpt_vpn(addr, vsid, ssize);
-       rpte = __real_pte(__pte(pte), ptep);
+       rpte = __real_pte(__pte(pte), ptep, offset);
 
        /*
         * Check if we have an active batch on this CPU. If not, just
index dd4c9b8b8a81e6967b29061014918b4f591921df..f6f55ab4980e7684a09942a510daf689f79f6d1c 100644 (file)
@@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
        const struct cpumask *l_cpumask;
 
        get_online_cpus();
-       for_each_online_node(nid) {
+       for_each_node_with_cpus(nid) {
                l_cpumask = cpumask_of_node(nid);
-               cpu = cpumask_first(l_cpumask);
+               cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+               if (cpu >= nr_cpu_ids)
+                       continue;
                opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
                                       get_hard_smp_processor_id(cpu));
        }
index 2b3eb01ab1107145395b0c697da4743bd2e97c8c..b7c53a51c31bbe5ba5fa62adbf6d1e97c3c1e8c3 100644 (file)
@@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
                        rc = PTR_ERR(txwin->paste_kaddr);
                        goto free_window;
                }
+       } else {
+               /*
+                * A user mapping must ensure that context switch issues
+                * CP_ABORT for this thread.
+                */
+               rc = set_thread_uses_vas();
+               if (rc)
+                       goto free_window;
        }
 
-       /*
-        * Now that we have a send window, ensure context switch issues
-        * CP_ABORT for this thread.
-        */
-       rc = -EINVAL;
-       if (set_thread_uses_vas() < 0)
-               goto free_window;
-
        set_vinst_win(vinst, txwin);
 
        return txwin;
index dceb51454d8d212a5cbc78ef891322bddf499800..652d3e96b812b93834323e1a3a60a1e5bbab3612 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/xics.h>
 #include <asm/xive.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
                        BUG_ON(cpu_online(cpu));
                        set_cpu_present(cpu, false);
                        set_hard_smp_processor_id(cpu, -1);
+                       update_numa_cpu_lookup_table(cpu, -1);
                        break;
                }
                if (cpu >= nr_cpu_ids)
@@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
        cpu_maps_update_done();
 }
 
-extern int find_and_online_cpu_nid(int cpu);
-
 static int dlpar_online_cpu(struct device_node *dn)
 {
        int rc = 0;
index 81d8614e73790b1923a3c8cfd336a2531fee76ce..5e1ef915018208c3511ef0e91c0064c8c9474389 100644 (file)
@@ -48,6 +48,28 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
 
+/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+       struct device_node *np;
+
+       /* Hotplug Events */
+       np = of_find_node_by_path("/event-sources/hot-plug-events");
+       if (np != NULL) {
+               if (dlpar_workqueue_init() == 0)
+                       request_event_sources_irqs(np, ras_hotplug_interrupt,
+                                                  "RAS_HOTPLUG");
+               of_node_put(np);
+       }
+
+       return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
 /*
  * Initialize handlers for the set of interrupts caused by hardware errors
  * and power system events.
@@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
                of_node_put(np);
        }
 
-       /* Hotplug Events */
-       np = of_find_node_by_path("/event-sources/hot-plug-events");
-       if (np != NULL) {
-               if (dlpar_workqueue_init() == 0)
-                       request_event_sources_irqs(np, ras_hotplug_interrupt,
-                                          "RAS_HOTPLUG");
-               of_node_put(np);
-       }
-
        /* EPOW Events */
        np = of_find_node_by_path("/event-sources/epow-events");
        if (np != NULL) {
index d9c4c93660491849029044d37ab8e60160b0d7de..091f1d0d0af190a0d6f274b8cab32960cc1eef86 100644 (file)
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
 
        rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
        if (rc) {
-               pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+               pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+                      target, prio);
                rc = -EIO;
                goto fail;
        }
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
        /* Configure and enable the queue in HW */
        rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
        if (rc) {
-               pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+               pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+                      target, prio);
                rc = -EIO;
        } else {
                q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
        if (IS_ERR(qpage))
                return PTR_ERR(qpage);
 
-       return xive_spapr_configure_queue(cpu, q, prio, qpage,
-                                         xive_queue_shift);
+       return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+                                         q, prio, qpage, xive_queue_shift);
 }
 
 static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
        struct xive_q *q = &xc->queue[prio];
        unsigned int alloc_order;
        long rc;
+       int hw_cpu = get_hard_smp_processor_id(cpu);
 
-       rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+       rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
        if (rc)
-               pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+               pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+                      hw_cpu, prio);
 
        alloc_order = xive_alloc_order(xive_queue_shift);
        free_pages((unsigned long)q->qpage, alloc_order);
index 6bf594ace663ec82f746132b4f62fc351bf5160c..8767e45f1b2b70953583a7a7157707696466d407 100644 (file)
@@ -430,6 +430,8 @@ config SPARC_LEON
        depends on SPARC32
        select USB_EHCI_BIG_ENDIAN_MMIO
        select USB_EHCI_BIG_ENDIAN_DESC
+       select USB_UHCI_BIG_ENDIAN_MMIO
+       select USB_UHCI_BIG_ENDIAN_DESC
        ---help---
          If you say Y here if you are running on a SPARC-LEON processor.
          The LEON processor is a synthesizable VHDL model of the
index aff152c87cf4ba62ed26ed6b7eb567b576dd84fd..5a82bac5e0bc7985529aa537e981997109945309 100644 (file)
@@ -1,6 +1,7 @@
 boot/compressed/vmlinux
 tools/test_get_len
 tools/insn_sanity
+tools/insn_decoder_test
 purgatory/kexec-purgatory.c
 purgatory/purgatory.ro
 
index 63bf349b2b24a8807c4f65869af50bab99e4c2f7..c1236b187824e222a2c7fddd417272369767b06b 100644 (file)
@@ -423,12 +423,6 @@ config X86_MPPARSE
          For old smp systems that do not have proper acpi support. Newer systems
          (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
-config X86_BIGSMP
-       bool "Support for big SMP systems with more than 8 CPUs"
-       depends on X86_32 && SMP
-       ---help---
-         This option is needed for the systems that have more than 8 CPUs
-
 config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
@@ -460,6 +454,12 @@ config INTEL_RDT
          Say N if unsure.
 
 if X86_32
+config X86_BIGSMP
+       bool "Support for big SMP systems with more than 8 CPUs"
+       depends on SMP
+       ---help---
+         This option is needed for the systems that have more than 8 CPUs
+
 config X86_EXTENDED_PLATFORM
        bool "Support for extended (non-PC) x86 platforms"
        default y
@@ -949,25 +949,66 @@ config MAXSMP
          Enable maximum number of CPUS and NUMA Nodes for this architecture.
          If unsure, say N.
 
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+       int
+       default NR_CPUS_RANGE_END if MAXSMP
+       default    1 if !SMP
+       default    2
+
+config NR_CPUS_RANGE_END
+       int
+       depends on X86_32
+       default   64 if  SMP &&  X86_BIGSMP
+       default    8 if  SMP && !X86_BIGSMP
+       default    1 if !SMP
+
+config NR_CPUS_RANGE_END
+       int
+       depends on X86_64
+       default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+       default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+       default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+       int
+       depends on X86_32
+       default   32 if  X86_BIGSMP
+       default    8 if  SMP
+       default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+       int
+       depends on X86_64
+       default 8192 if  MAXSMP
+       default   64 if  SMP
+       default    1 if !SMP
+
 config NR_CPUS
        int "Maximum number of CPUs" if SMP && !MAXSMP
-       range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 64 if SMP && X86_32 && X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
-       range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
-       default "1" if !SMP
-       default "8192" if MAXSMP
-       default "32" if SMP && X86_BIGSMP
-       default "8" if SMP && X86_32
-       default "64" if SMP
+       range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+       default NR_CPUS_DEFAULT
        ---help---
          This allows you to specify the maximum number of CPUs which this
          kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
          supported value is 8192, otherwise the maximum value is 512.  The
          minimum value which makes sense is 2.
 
-         This is purely to save memory - each supported CPU adds
-         approximately eight kilobytes to the kernel image.
+         This is purely to save memory: each supported CPU adds about 8KB
+         to the kernel image.
 
 config SCHED_SMT
        bool "SMT (Hyperthreading) scheduler support"
@@ -1363,7 +1404,7 @@ config HIGHMEM4G
 
 config HIGHMEM64G
        bool "64GB"
-       depends on !M486
+       depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
        select X86_PAE
        ---help---
          Select this if you have a 32-bit processor and more than 4
index 65a9a4716e34f55394d057335629d0b32ec55ada..8b8d2297d4867b06acaecf31e3be7124888f4d79 100644 (file)
@@ -374,7 +374,7 @@ config X86_TSC
 
 config X86_CMPXCHG64
        def_bool y
-       depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+       depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
 
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
@@ -385,7 +385,7 @@ config X86_CMOV
 config X86_MINIMUM_CPU_FAMILY
        int
        default "64" if X86_64
-       default "6" if X86_32 && X86_P6_NOP
+       default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
        default "5" if X86_32 && X86_CMPXCHG64
        default "4"
 
index 36870b26067a73655b96d137c04977c8f392237d..d08805032f0193ab96dde00890a0502a9f2b24e6 100644 (file)
@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
 {
        unsigned int j;
 
-       state->lens[0] = 0;
-       state->lens[1] = 1;
-       state->lens[2] = 2;
-       state->lens[3] = 3;
+       /* initially all lanes are unused */
+       state->lens[0] = 0xFFFFFFFF00000000;
+       state->lens[1] = 0xFFFFFFFF00000001;
+       state->lens[2] = 0xFFFFFFFF00000002;
+       state->lens[3] = 0xFFFFFFFF00000003;
+
        state->unused_lanes = 0xFF03020100;
        for (j = 0; j < 4; j++)
                state->ldata[j].job_in_lane = NULL;
index 3f48f695d5e6ac6546a009c734fcac517564b24d..dce7092ab24a247c1165f80b17c687d255023a05 100644 (file)
@@ -97,80 +97,69 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS  21*8
 
-       .macro ALLOC_PT_GPREGS_ON_STACK
-       addq    $-(15*8), %rsp
-       .endm
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+       /*
+        * Push registers and sanitize registers of values that a
+        * speculation attack might otherwise want to exploit. The
+        * lower registers are likely clobbered well before they
+        * could be put to use in a speculative execution gadget.
+        * Interleave XOR with PUSH for better uop scheduling:
+        */
+       pushq   %rdi            /* pt_regs->di */
+       pushq   %rsi            /* pt_regs->si */
+       pushq   \rdx            /* pt_regs->dx */
+       pushq   %rcx            /* pt_regs->cx */
+       pushq   \rax            /* pt_regs->ax */
+       pushq   %r8             /* pt_regs->r8 */
+       xorq    %r8, %r8        /* nospec   r8 */
+       pushq   %r9             /* pt_regs->r9 */
+       xorq    %r9, %r9        /* nospec   r9 */
+       pushq   %r10            /* pt_regs->r10 */
+       xorq    %r10, %r10      /* nospec   r10 */
+       pushq   %r11            /* pt_regs->r11 */
+       xorq    %r11, %r11      /* nospec   r11*/
+       pushq   %rbx            /* pt_regs->rbx */
+       xorl    %ebx, %ebx      /* nospec   rbx*/
+       pushq   %rbp            /* pt_regs->rbp */
+       xorl    %ebp, %ebp      /* nospec   rbp*/
+       pushq   %r12            /* pt_regs->r12 */
+       xorq    %r12, %r12      /* nospec   r12*/
+       pushq   %r13            /* pt_regs->r13 */
+       xorq    %r13, %r13      /* nospec   r13*/
+       pushq   %r14            /* pt_regs->r14 */
+       xorq    %r14, %r14      /* nospec   r14*/
+       pushq   %r15            /* pt_regs->r15 */
+       xorq    %r15, %r15      /* nospec   r15*/
+       UNWIND_HINT_REGS
+.endm
 
-       .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
-       .if \r11
-       movq %r11, 6*8+\offset(%rsp)
-       .endif
-       .if \r8910
-       movq %r10, 7*8+\offset(%rsp)
-       movq %r9,  8*8+\offset(%rsp)
-       movq %r8,  9*8+\offset(%rsp)
-       .endif
-       .if \rax
-       movq %rax, 10*8+\offset(%rsp)
-       .endif
-       .if \rcx
-       movq %rcx, 11*8+\offset(%rsp)
-       .endif
-       movq %rdx, 12*8+\offset(%rsp)
-       movq %rsi, 13*8+\offset(%rsp)
-       movq %rdi, 14*8+\offset(%rsp)
-       UNWIND_HINT_REGS offset=\offset extra=0
-       .endm
-       .macro SAVE_C_REGS offset=0
-       SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
-       SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_R891011
-       SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RCX_R891011
-       SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
-       SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
-       .endm
-
-       .macro SAVE_EXTRA_REGS offset=0
-       movq %r15, 0*8+\offset(%rsp)
-       movq %r14, 1*8+\offset(%rsp)
-       movq %r13, 2*8+\offset(%rsp)
-       movq %r12, 3*8+\offset(%rsp)
-       movq %rbp, 4*8+\offset(%rsp)
-       movq %rbx, 5*8+\offset(%rsp)
-       UNWIND_HINT_REGS offset=\offset
-       .endm
-
-       .macro POP_EXTRA_REGS
+.macro POP_REGS pop_rdi=1 skip_r11rcx=0
        popq %r15
        popq %r14
        popq %r13
        popq %r12
        popq %rbp
        popq %rbx
-       .endm
-
-       .macro POP_C_REGS
+       .if \skip_r11rcx
+       popq %rsi
+       .else
        popq %r11
+       .endif
        popq %r10
        popq %r9
        popq %r8
        popq %rax
+       .if \skip_r11rcx
+       popq %rsi
+       .else
        popq %rcx
+       .endif
        popq %rdx
        popq %rsi
+       .if \pop_rdi
        popq %rdi
-       .endm
-
-       .macro icebp
-       .byte 0xf1
-       .endm
+       .endif
+.endm
 
 /*
  * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
@@ -178,7 +167,7 @@ For 32-bit we have the following conventions - kernel is built with
  * is just setting the LSB, which makes it an invalid stack address and is also
  * a signal to the unwinder that it's a pt_regs pointer in disguise.
  *
- * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
  * the original rbp.
  */
 .macro ENCODE_FRAME_POINTER ptregs_offset=0
index 30c8c5344c4a5dcfeb96d0711a322e50de33d324..8971bd64d515c5bb4a9b95108fd802b8418764f2 100644 (file)
@@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)
 
        swapgs
        /*
-        * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+        * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
         * is not required to switch CR3.
         */
        movq    %rsp, PER_CPU_VAR(rsp_scratch)
@@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)
        pushq   %rcx                            /* pt_regs->ip */
 GLOBAL(entry_SYSCALL_64_after_hwframe)
        pushq   %rax                            /* pt_regs->orig_ax */
-       pushq   %rdi                            /* pt_regs->di */
-       pushq   %rsi                            /* pt_regs->si */
-       pushq   %rdx                            /* pt_regs->dx */
-       pushq   %rcx                            /* pt_regs->cx */
-       pushq   $-ENOSYS                        /* pt_regs->ax */
-       pushq   %r8                             /* pt_regs->r8 */
-       pushq   %r9                             /* pt_regs->r9 */
-       pushq   %r10                            /* pt_regs->r10 */
-       pushq   %r11                            /* pt_regs->r11 */
-       pushq   %rbx                            /* pt_regs->rbx */
-       pushq   %rbp                            /* pt_regs->rbp */
-       pushq   %r12                            /* pt_regs->r12 */
-       pushq   %r13                            /* pt_regs->r13 */
-       pushq   %r14                            /* pt_regs->r14 */
-       pushq   %r15                            /* pt_regs->r15 */
-       UNWIND_HINT_REGS
+
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 
        TRACE_IRQS_OFF
 
@@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 syscall_return_via_sysret:
        /* rcx and r11 are already restored (see code above) */
        UNWIND_HINT_EMPTY
-       POP_EXTRA_REGS
-       popq    %rsi    /* skip r11 */
-       popq    %r10
-       popq    %r9
-       popq    %r8
-       popq    %rax
-       popq    %rsi    /* skip rcx */
-       popq    %rdx
-       popq    %rsi
+       POP_REGS pop_rdi=0 skip_r11rcx=1
 
        /*
         * Now all regs are restored except RSP and RDI.
@@ -559,9 +537,7 @@ END(irq_entries_start)
        call    switch_to_thread_stack
 1:
 
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
+       PUSH_AND_CLEAR_REGS
        ENCODE_FRAME_POINTER
 
        testb   $3, CS(%rsp)
@@ -622,15 +598,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
        ud2
 1:
 #endif
-       POP_EXTRA_REGS
-       popq    %r11
-       popq    %r10
-       popq    %r9
-       popq    %r8
-       popq    %rax
-       popq    %rcx
-       popq    %rdx
-       popq    %rsi
+       POP_REGS pop_rdi=0
 
        /*
         * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
@@ -688,8 +656,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
        ud2
 1:
 #endif
-       POP_EXTRA_REGS
-       POP_C_REGS
+       POP_REGS
        addq    $8, %rsp        /* skip regs->orig_ax */
        /*
         * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -908,7 +875,9 @@ ENTRY(\sym)
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
        .endif
 
-       ALLOC_PT_GPREGS_ON_STACK
+       /* Save all registers in pt_regs */
+       PUSH_AND_CLEAR_REGS
+       ENCODE_FRAME_POINTER
 
        .if \paranoid < 2
        testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
@@ -1121,9 +1090,7 @@ ENTRY(xen_failsafe_callback)
        addq    $0x30, %rsp
        UNWIND_HINT_IRET_REGS
        pushq   $-1 /* orig_ax = -1 => not a system call */
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
+       PUSH_AND_CLEAR_REGS
        ENCODE_FRAME_POINTER
        jmp     error_exit
 END(xen_failsafe_callback)
@@ -1163,16 +1130,13 @@ idtentry machine_check          do_mce                  has_error_code=0        paranoid=1
 #endif
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
        UNWIND_HINT_FUNC
        cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -1211,21 +1175,18 @@ ENTRY(paranoid_exit)
        jmp     .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
        TRACE_IRQS_IRETQ_DEBUG
+       RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
        jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
-       UNWIND_HINT_FUNC
+       UNWIND_HINT_REGS offset=8
        cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-       ENCODE_FRAME_POINTER 8
-       xorl    %ebx, %ebx
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
 
@@ -1406,22 +1367,7 @@ ENTRY(nmi)
        pushq   1*8(%rdx)       /* pt_regs->rip */
        UNWIND_HINT_IRET_REGS
        pushq   $-1             /* pt_regs->orig_ax */
-       pushq   %rdi            /* pt_regs->di */
-       pushq   %rsi            /* pt_regs->si */
-       pushq   (%rdx)          /* pt_regs->dx */
-       pushq   %rcx            /* pt_regs->cx */
-       pushq   %rax            /* pt_regs->ax */
-       pushq   %r8             /* pt_regs->r8 */
-       pushq   %r9             /* pt_regs->r9 */
-       pushq   %r10            /* pt_regs->r10 */
-       pushq   %r11            /* pt_regs->r11 */
-       pushq   %rbx            /* pt_regs->rbx */
-       pushq   %rbp            /* pt_regs->rbp */
-       pushq   %r12            /* pt_regs->r12 */
-       pushq   %r13            /* pt_regs->r13 */
-       pushq   %r14            /* pt_regs->r14 */
-       pushq   %r15            /* pt_regs->r15 */
-       UNWIND_HINT_REGS
+       PUSH_AND_CLEAR_REGS rdx=(%rdx)
        ENCODE_FRAME_POINTER
 
        /*
@@ -1631,7 +1577,8 @@ end_repeat_nmi:
         * frame to point back to repeat_nmi.
         */
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-       ALLOC_PT_GPREGS_ON_STACK
+       PUSH_AND_CLEAR_REGS
+       ENCODE_FRAME_POINTER
 
        /*
         * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
@@ -1655,8 +1602,7 @@ end_repeat_nmi:
 nmi_swapgs:
        SWAPGS_UNSAFE_STACK
 nmi_restore:
-       POP_EXTRA_REGS
-       POP_C_REGS
+       POP_REGS
 
        /*
         * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
index 98d5358e4041a7e144ec566f7db19ff054cedbcc..fd65e016e4133f5634545062d098fc25fb5c0b1d 100644 (file)
@@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
+       xorq    %r15, %r15              /* nospec   r15 */
        cld
 
        /*
@@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
        pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
+       xorq    %r15, %r15              /* nospec   r15 */
 
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
@@ -338,15 +358,25 @@ ENTRY(entry_INT80_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   %r12                    /* pt_regs->r12 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   %r13                    /* pt_regs->r13 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   %r14                    /* pt_regs->r14 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   %r15                    /* pt_regs->r15 */
+       xorq    %r15, %r15              /* nospec   r15 */
        cld
 
        /*
index 731153a4681e73f761dea8c0c15ce6757b89860e..56457cb73448b494b1aed2138fcee8e13d50d81b 100644 (file)
@@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu)
                break;
 
        case INTEL_FAM6_SANDYBRIDGE_X:
-               switch (cpu_data(cpu).x86_mask) {
+               switch (cpu_data(cpu).x86_stepping) {
                case 6: rev = 0x618; break;
                case 7: rev = 0x70c; break;
                }
index ae64d0b69729dbb23c436d6c3bbf360a3cafb53b..cf372b90557ed4e8a788c8f97b515ac956d2512e 100644 (file)
@@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void)
         * on PMU interrupt
         */
        if (boot_cpu_data.x86_model == 28
-           && boot_cpu_data.x86_mask < 10) {
+           && boot_cpu_data.x86_stepping < 10) {
                pr_cont("LBR disabled due to erratum");
                return;
        }
index a5604c3529308b7cfc1dc336a8d53496d531659b..408879b0c0d4e41c56906d2464734279c9360f7e 100644 (file)
@@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = {
 
 static __init void p6_pmu_rdpmc_quirk(void)
 {
-       if (boot_cpu_data.x86_mask < 9) {
+       if (boot_cpu_data.x86_stepping < 9) {
                /*
                 * PPro erratum 26; fixed in stepping 9 and above.
                 */
index 44f5d79d51056b036e7ef4536d7bfe340dae9747..11881726ed37290128f9bca07b9944aa2d818677 100644 (file)
@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
        if (boot_cpu_data.x86 == 0x0F &&
            boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
            boot_cpu_data.x86_model <= 0x05 &&
-           boot_cpu_data.x86_mask < 0x0A)
+           boot_cpu_data.x86_stepping < 0x0A)
                return 1;
        else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
                return 1;
index 30d40614601641b9fc2dbd5ce734ff86d0cdd731..e1259f043ae999fa21e1f998431ab12cd73a11ea 100644 (file)
@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 
        asm ("cmp %1,%2; sbb %0,%0;"
                        :"=r" (mask)
-                       :"r"(size),"r" (index)
+                       :"g"(size),"r" (index)
                        :"cc");
        return mask;
 }
index 34d99af43994453e1cec89aa202cc17b052ee690..6804d66427673ec314659944e65052b5dfba273e 100644 (file)
@@ -5,23 +5,20 @@
 #include <linux/stringify.h>
 
 /*
- * Since some emulators terminate on UD2, we cannot use it for WARN.
- * Since various instruction decoders disagree on the length of UD1,
- * we cannot use it either. So use UD0 for WARN.
+ * Despite that some emulators terminate on UD2, we use it for WARN().
  *
- * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
- *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
- *  with various things like the Intel SDM instruction encoding rules)
+ * Since various instruction decoders/specs disagree on the encoding of
+ * UD0/UD1.
  */
 
-#define ASM_UD0                ".byte 0x0f, 0xff"
+#define ASM_UD0                ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
 #define ASM_UD1                ".byte 0x0f, 0xb9" /* + ModRM */
 #define ASM_UD2                ".byte 0x0f, 0x0b"
 
 #define INSN_UD0       0xff0f
 #define INSN_UD2       0x0b0f
 
-#define LEN_UD0                2
+#define LEN_UD2                2
 
 #ifdef CONFIG_GENERIC_BUG
 
@@ -77,7 +74,11 @@ do {                                                         \
        unreachable();                                          \
 } while (0)
 
-#define __WARN_FLAGS(flags)    _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
+#define __WARN_FLAGS(flags)                                    \
+do {                                                           \
+       _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags));           \
+       annotate_reachable();                                   \
+} while (0)
 
 #include <asm-generic/bug.h>
 
index 70eddb3922ff7b3e44fc27b9426da39aa4e2b6fa..736771c9822ef965233b7114fd0b1a025e8c3a46 100644 (file)
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-               asm_volatile_goto("1: jmp 6f\n"
-                        "2:\n"
-                        ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-                                "((5f-4f) - (2b-1b)),0x90\n"
-                        "3:\n"
-                        ".section .altinstructions,\"a\"\n"
-                        " .long 1b - .\n"              /* src offset */
-                        " .long 4f - .\n"              /* repl offset */
-                        " .word %P1\n"                 /* always replace */
-                        " .byte 3b - 1b\n"             /* src len */
-                        " .byte 5f - 4f\n"             /* repl len */
-                        " .byte 3b - 2b\n"             /* pad len */
-                        ".previous\n"
-                        ".section .altinstr_replacement,\"ax\"\n"
-                        "4: jmp %l[t_no]\n"
-                        "5:\n"
-                        ".previous\n"
-                        ".section .altinstructions,\"a\"\n"
-                        " .long 1b - .\n"              /* src offset */
-                        " .long 0\n"                   /* no replacement */
-                        " .word %P0\n"                 /* feature bit */
-                        " .byte 3b - 1b\n"             /* src len */
-                        " .byte 0\n"                   /* repl len */
-                        " .byte 0\n"                   /* pad len */
-                        ".previous\n"
-                        ".section .altinstr_aux,\"ax\"\n"
-                        "6:\n"
-                        " testb %[bitnum],%[cap_byte]\n"
-                        " jnz %l[t_yes]\n"
-                        " jmp %l[t_no]\n"
-                        ".previous\n"
-                        : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
-                            [bitnum] "i" (1 << (bit & 7)),
-                            [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
-                        : : t_yes, t_no);
-       t_yes:
-               return true;
-       t_no:
-               return false;
+       asm_volatile_goto("1: jmp 6f\n"
+                "2:\n"
+                ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+                        "((5f-4f) - (2b-1b)),0x90\n"
+                "3:\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 4f - .\n"              /* repl offset */
+                " .word %P[always]\n"          /* always replace */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 5f - 4f\n"             /* repl len */
+                " .byte 3b - 2b\n"             /* pad len */
+                ".previous\n"
+                ".section .altinstr_replacement,\"ax\"\n"
+                "4: jmp %l[t_no]\n"
+                "5:\n"
+                ".previous\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 0\n"                   /* no replacement */
+                " .word %P[feature]\n"         /* feature bit */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 0\n"                   /* repl len */
+                " .byte 0\n"                   /* pad len */
+                ".previous\n"
+                ".section .altinstr_aux,\"ax\"\n"
+                "6:\n"
+                " testb %[bitnum],%[cap_byte]\n"
+                " jnz %l[t_yes]\n"
+                " jmp %l[t_no]\n"
+                ".previous\n"
+                : : [feature]  "i" (bit),
+                    [always]   "i" (X86_FEATURE_ALWAYS),
+                    [bitnum]   "i" (1 << (bit & 7)),
+                    [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+                : : t_yes, t_no);
+t_yes:
+       return true;
+t_no:
+       return false;
 }
 
 #define static_cpu_has(bit)                                    \
index 4d57894635f242da061e6a10acccaec70ae1dbc0..76b058533e473b10d99e7a1ee4b661b5b2b2b14d 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
 
 #ifdef __ASSEMBLY__
 
@@ -164,10 +165,15 @@ static inline void vmexit_fill_RSB(void)
 
 static inline void indirect_branch_prediction_barrier(void)
 {
-       alternative_input("",
-                         "call __ibp_barrier",
-                         X86_FEATURE_USE_IBPB,
-                         ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
+       asm volatile(ALTERNATIVE("",
+                                "movl %[msr], %%ecx\n\t"
+                                "movl %[val], %%eax\n\t"
+                                "movl $0, %%edx\n\t"
+                                "wrmsr",
+                                X86_FEATURE_USE_IBPB)
+                    : : [msr] "i" (MSR_IA32_PRED_CMD),
+                        [val] "i" (PRED_CMD_IBPB)
+                    : "eax", "ecx", "edx", "memory");
 }
 
 #endif /* __ASSEMBLY__ */
index 4baa6bceb2325e6dd056ca682e1eeeb435693a26..d652a38080659775ef145d089291bde353ffe97a 100644 (file)
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
index 892df375b6155a51f584760efb9f9e77c3f732e8..554841fab717aef09d2b5cc57410a6eed8c2df0c 100644 (file)
@@ -297,9 +297,9 @@ static inline void __flush_tlb_global(void)
 {
        PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
 }
-static inline void __flush_tlb_single(unsigned long addr)
+static inline void __flush_tlb_one_user(unsigned long addr)
 {
-       PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
+       PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
 }
 
 static inline void flush_tlb_others(const struct cpumask *cpumask,
index 6ec54d01972dcf7d79e75bcfbecc84a1f2da80ea..f624f1f10316c248911585f757ea5bd257e98434 100644 (file)
@@ -217,7 +217,7 @@ struct pv_mmu_ops {
        /* TLB operations */
        void (*flush_tlb_user)(void);
        void (*flush_tlb_kernel)(void);
-       void (*flush_tlb_single)(unsigned long addr);
+       void (*flush_tlb_one_user)(unsigned long addr);
        void (*flush_tlb_others)(const struct cpumask *cpus,
                                 const struct flush_tlb_info *info);
 
index e67c0620aec2a268537b46d1da80ce6d0ef174a5..e55466760ff8e031433132eab676535e614e224f 100644 (file)
@@ -61,7 +61,7 @@ void paging_init(void);
 #define kpte_clear_flush(ptep, vaddr)          \
 do {                                           \
        pte_clear(&init_mm, (vaddr), (ptep));   \
-       __flush_tlb_one((vaddr));               \
+       __flush_tlb_one_kernel((vaddr));                \
 } while (0)
 
 #endif /* !__ASSEMBLY__ */
index 793bae7e7ce36bd36e728a8a6fe7f8e17b9920db..1bd9ed87606f45f5a22f2510bde9ba34029a0551 100644 (file)
@@ -91,7 +91,7 @@ struct cpuinfo_x86 {
        __u8                    x86;            /* CPU family */
        __u8                    x86_vendor;     /* CPU vendor */
        __u8                    x86_model;
-       __u8                    x86_mask;
+       __u8                    x86_stepping;
 #ifdef CONFIG_X86_64
        /* Number of 4K pages in DTLB/ITLB combined(in pages): */
        int                     x86_tlbsize;
@@ -109,7 +109,7 @@ struct cpuinfo_x86 {
        char                    x86_vendor_id[16];
        char                    x86_model_id[64];
        /* in KB - valid for CPUS which support this call: */
-       int                     x86_cache_size;
+       unsigned int            x86_cache_size;
        int                     x86_cache_alignment;    /* In bytes */
        /* Cache QoS architectural values: */
        int                     x86_cache_max_rmid;     /* max index */
@@ -977,7 +977,4 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
-
-void __ibp_barrier(void);
-
 #endif /* _ASM_X86_PROCESSOR_H */
index 461f53d27708ae8b80622753122c1a4927537ee2..a4189762b2667016e1e07a0942ff9eaecab9ba51 100644 (file)
@@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
+void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
 void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
index 2b8f18ca58747ae40b515c2bf674f8faced147e8..84137c22fdfade9bc8224c317808b311ec6f3007 100644 (file)
@@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 #else
 #define __flush_tlb() __native_flush_tlb()
 #define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
 #endif
 
 static inline bool tlb_defer_switch_to_init_mm(void)
@@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void)
 /*
  * flush one page in the user mapping
  */
-static inline void __native_flush_tlb_single(unsigned long addr)
+static inline void __native_flush_tlb_one_user(unsigned long addr)
 {
        u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
 
@@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void)
 /*
  * flush one page in the kernel mapping
  */
-static inline void __flush_tlb_one(unsigned long addr)
+static inline void __flush_tlb_one_kernel(unsigned long addr)
 {
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-       __flush_tlb_single(addr);
+
+       /*
+        * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
+        * paravirt equivalent.  Even with PCID, this is sufficient: we only
+        * use PCID if we also use global PTEs for the kernel mapping, and
+        * INVLPG flushes global translations across all address spaces.
+        *
+        * If PTI is on, then the kernel is mapped with non-global PTEs, and
+        * __flush_tlb_one_user() will flush the given address for the current
+        * kernel address space and for its usermode counterpart, but it does
+        * not flush it for other address spaces.
+        */
+       __flush_tlb_one_user(addr);
 
        if (!static_cpu_has(X86_FEATURE_PTI))
                return;
 
        /*
-        * __flush_tlb_single() will have cleared the TLB entry for this ASID,
-        * but since kernel space is replicated across all, we must also
-        * invalidate all others.
+        * See above.  We need to propagate the flush to all other address
+        * spaces.  In principle, we only need to propagate it to kernelmode
+        * address spaces, but the extra bookkeeping we would need is not
+        * worth it.
         */
        invalidate_other_asid();
 }
index 6db28f17ff2884e01122f2689b117e8ae63f9ec4..c88e0b127810f22b15b53eb150d11e9584201885 100644 (file)
@@ -235,7 +235,7 @@ int amd_cache_northbridges(void)
        if (boot_cpu_data.x86 == 0x10 &&
            boot_cpu_data.x86_model >= 0x8 &&
            (boot_cpu_data.x86_model > 0x9 ||
-            boot_cpu_data.x86_mask >= 0x1))
+            boot_cpu_data.x86_stepping >= 0x1))
                amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
 
        if (boot_cpu_data.x86 == 0x15)
index 25ddf02598d20a89cb1da2243aba687e6eee7657..b203af0855b57618fc398e29425ef96755c95552 100644 (file)
@@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static u32 hsx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x3a; /* EP */
        case 0x04: return 0x0f; /* EX */
        }
@@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void)
 
 static u32 bdx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x00000011;
        case 0x03: return 0x0700000e;
        case 0x04: return 0x0f00000c;
@@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void)
 
 static u32 skx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x03: return 0x01000136;
        case 0x04: return 0x02000014;
        }
index 46b675aaf20b8a1f30f1eee2cf63717e299c5367..f11910b44638c84995848a5acdecbb296ddb5636 100644 (file)
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 
        uv_gre_table = gre;
        for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+               unsigned long size = ((unsigned long)(gre->limit - lgre)
+                                       << UV_GAM_RANGE_SHFT);
+               int order = 0;
+               char suffix[] = " KMGTPE";
+
+               while (size > 9999 && order < sizeof(suffix)) {
+                       size /= 1024;
+                       order++;
+               }
+
                if (!index) {
                        pr_info("UV: GAM Range Table...\n");
                        pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
                }
-               pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
+               pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
                        index++,
                        (unsigned long)lgre << UV_GAM_RANGE_SHFT,
                        (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
-                       ((unsigned long)(gre->limit - lgre)) >>
-                               (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+                       size, suffix[order],
                        gre->type, gre->nasid, gre->sockid, gre->pnode);
 
                lgre = gre->limit;
index fa1261eefa16e73cedf27aadb878753be693f919..f91ba53e06c8b90f9d5557a2713896a1a50100f0 100644 (file)
@@ -18,7 +18,7 @@ void foo(void)
        OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
        OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
        OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
-       OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+       OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
        OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
        OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
        OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
index 5bddbdcbc4a3cf722cd960c032e8ec400369e17a..f0e6456ca7d3cd482893a7d1953aec5d79c4caad 100644 (file)
@@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                return;
        }
 
-       if (c->x86_model == 6 && c->x86_mask == 1) {
+       if (c->x86_model == 6 && c->x86_stepping == 1) {
                const int K6_BUG_LOOP = 1000000;
                int n;
                void (*f_vide)(void);
@@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 
        /* K6 with old style WHCR */
        if (c->x86_model < 8 ||
-          (c->x86_model == 8 && c->x86_mask < 8)) {
+          (c->x86_model == 8 && c->x86_stepping < 8)) {
                /* We can only write allocate on the low 508Mb */
                if (mbytes > 508)
                        mbytes = 508;
@@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                return;
        }
 
-       if ((c->x86_model == 8 && c->x86_mask > 7) ||
+       if ((c->x86_model == 8 && c->x86_stepping > 7) ||
             c->x86_model == 9 || c->x86_model == 13) {
                /* The more serious chips .. */
 
@@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
         * As per AMD technical note 27212 0.2
         */
-       if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+       if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
                rdmsr(MSR_K7_CLK_CTL, l, h);
                if ((l & 0xfff00000) != 0x20000000) {
                        pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
@@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * but they are not certified as MP capable.
         */
        /* Athlon 660/661 is valid. */
-       if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
-           (c->x86_mask == 1)))
+       if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
+           (c->x86_stepping == 1)))
                return;
 
        /* Duron 670 is valid */
-       if ((c->x86_model == 7) && (c->x86_mask == 0))
+       if ((c->x86_model == 7) && (c->x86_stepping == 0))
                return;
 
        /*
@@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
         * more.
         */
-       if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
-           ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+       if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
+           ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
             (c->x86_model > 7))
                if (cpu_has(c, X86_FEATURE_MP))
                        return;
@@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
        /*  Set MTRR capability flag if appropriate */
        if (c->x86 == 5)
                if (c->x86_model == 13 || c->x86_model == 9 ||
-                   (c->x86_model == 8 && c->x86_mask >= 8))
+                   (c->x86_model == 8 && c->x86_stepping >= 8))
                        set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
         * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
         * all up to and including B1.
         */
-       if (c->x86_model <= 1 && c->x86_mask <= 1)
+       if (c->x86_model <= 1 && c->x86_stepping <= 1)
                set_cpu_cap(c, X86_FEATURE_CPB);
 }
 
@@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
        /* AMD errata T13 (order #21922) */
        if ((c->x86 == 6)) {
                /* Duron Rev A0 */
-               if (c->x86_model == 3 && c->x86_mask == 0)
+               if (c->x86_model == 3 && c->x86_stepping == 0)
                        size = 64;
                /* Tbird rev A1/A2 */
                if (c->x86_model == 4 &&
-                       (c->x86_mask == 0 || c->x86_mask == 1))
+                       (c->x86_stepping == 0 || c->x86_stepping == 1))
                        size = 256;
        }
        return size;
@@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
        }
 
        /* OSVW unavailable or ID unknown, match family-model-stepping range */
-       ms = (cpu->x86_model << 4) | cpu->x86_mask;
+       ms = (cpu->x86_model << 4) | cpu->x86_stepping;
        while ((range = *erratum++))
                if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
                    (ms >= AMD_MODEL_RANGE_START(range)) &&
index 71949bf2de5ad378e8184566010e4aaa6aa2307d..d71c8b54b696d4593ffb15ff894468ad3e524a50 100644 (file)
@@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
        if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
                return SPECTRE_V2_CMD_NONE;
        else {
-               ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
-                                         sizeof(arg));
+               ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
                if (ret < 0)
                        return SPECTRE_V2_CMD_AUTO;
 
@@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
                }
 
                if (i >= ARRAY_SIZE(mitigation_options)) {
-                       pr_err("unknown option (%s). Switching to AUTO select\n",
-                              mitigation_options[i].option);
+                       pr_err("unknown option (%s). Switching to AUTO select\n", arg);
                        return SPECTRE_V2_CMD_AUTO;
                }
        }
@@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
             cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
             cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
            !IS_ENABLED(CONFIG_RETPOLINE)) {
-               pr_err("%s selected but not compiled in. Switching to AUTO select\n",
-                      mitigation_options[i].option);
+               pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
                return SPECTRE_V2_CMD_AUTO;
        }
 
@@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)
                        goto retpoline_auto;
                break;
        }
-       pr_err("kernel not compiled with retpoline; no mitigation available!");
+       pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
        return;
 
 retpoline_auto:
        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
        retpoline_amd:
                if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-                       pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+                       pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
                        goto retpoline_generic;
                }
                mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@@ -281,7 +278,7 @@ retpoline_auto:
        pr_info("%s\n", spectre_v2_strings[mode]);
 
        /*
-        * If neither SMEP or KPTI are available, there is a risk of
+        * If neither SMEP nor PTI are available, there is a risk of
         * hitting userspace addresses in the RSB after a context switch
         * from a shallow call stack to a deeper one. To prevent this fill
         * the entire RSB, even when using IBRS.
@@ -295,21 +292,20 @@ retpoline_auto:
        if ((!boot_cpu_has(X86_FEATURE_PTI) &&
             !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
                setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-               pr_info("Filling RSB on context switch\n");
+               pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
        }
 
        /* Initialize Indirect Branch Prediction Barrier if supported */
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
-               pr_info("Enabling Indirect Branch Prediction Barrier\n");
+               pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
        }
 }
 
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev,
-                         struct device_attribute *attr, char *buf)
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
                return sprintf(buf, "Not affected\n");
@@ -318,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev,
        return sprintf(buf, "Vulnerable\n");
 }
 
-ssize_t cpu_show_spectre_v1(struct device *dev,
-                           struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
                return sprintf(buf, "Not affected\n");
        return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
 
-ssize_t cpu_show_spectre_v2(struct device *dev,
-                           struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
                return sprintf(buf, "Not affected\n");
@@ -337,9 +331,3 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
                       spectre_v2_module_string());
 }
 #endif
-
-void __ibp_barrier(void)
-{
-       __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
-}
-EXPORT_SYMBOL_GPL(__ibp_barrier);
index c578cd29c2d2c47bd8c03268bc8809c28785d43c..e5ec0f11c0de7c06a0975d852822b2033d914185 100644 (file)
@@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
                        clear_cpu_cap(c, X86_FEATURE_TSC);
                        break;
                case 8:
-                       switch (c->x86_mask) {
+                       switch (c->x86_stepping) {
                        default:
                        name = "2";
                                break;
@@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
         *  - Note, it seems this may only be in engineering samples.
         */
        if ((c->x86 == 6) && (c->x86_model == 9) &&
-                               (c->x86_mask == 1) && (size == 65))
+                               (c->x86_stepping == 1) && (size == 65))
                size -= 1;
        return size;
 }
index d63f4b5706e4d76271da40fe14c6ef4ba41fe999..824aee0117bb5402d52fb5c8958e98b0bebfdf0c 100644 (file)
@@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
                cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                c->x86          = x86_family(tfms);
                c->x86_model    = x86_model(tfms);
-               c->x86_mask     = x86_stepping(tfms);
+               c->x86_stepping = x86_stepping(tfms);
 
                if (cap0 & (1<<19)) {
                        c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
        int i;
 
        c->loops_per_jiffy = loops_per_jiffy;
-       c->x86_cache_size = -1;
+       c->x86_cache_size = 0;
        c->x86_vendor = X86_VENDOR_UNKNOWN;
-       c->x86_model = c->x86_mask = 0; /* So far unknown... */
+       c->x86_model = c->x86_stepping = 0;     /* So far unknown... */
        c->x86_vendor_id[0] = '\0'; /* Unset */
        c->x86_model_id[0] = '\0';  /* Unset */
        c->x86_max_cores = 1;
@@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 
        pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
-       if (c->x86_mask || c->cpuid_level >= 0)
-               pr_cont(", stepping: 0x%x)\n", c->x86_mask);
+       if (c->x86_stepping || c->cpuid_level >= 0)
+               pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
        else
                pr_cont(")\n");
 }
index 6b4bb335641f3f039c7cff50190ad38b5cf9de66..8949b7ae6d92536c1bbff659d7463588d2bdfb06 100644 (file)
@@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 
        /* common case step number/rev -- exceptions handled below */
        c->x86_model = (dir1 >> 4) + 1;
-       c->x86_mask = dir1 & 0xf;
+       c->x86_stepping = dir1 & 0xf;
 
        /* Now cook; the original recipe is by Channing Corn, from Cyrix.
         * We do the same thing for each generation: we work out
index 319bf989fad1e1f3d7ed2234090a51c55a0067c6..d19e903214b403289aaf304eba85cc585c100c5e 100644 (file)
@@ -116,14 +116,13 @@ struct sku_microcode {
        u32 microcode;
 };
 static const struct sku_microcode spectre_bad_microcodes[] = {
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0B,   0x84 },
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0A,   0x84 },
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x09,   0x84 },
-       { INTEL_FAM6_KABYLAKE_MOBILE,   0x0A,   0x84 },
-       { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x84 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0B,   0x80 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0A,   0x80 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x09,   0x80 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x0A,   0x80 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x80 },
        { INTEL_FAM6_SKYLAKE_X,         0x03,   0x0100013e },
        { INTEL_FAM6_SKYLAKE_X,         0x04,   0x0200003c },
-       { INTEL_FAM6_SKYLAKE_MOBILE,    0x03,   0xc2 },
        { INTEL_FAM6_SKYLAKE_DESKTOP,   0x03,   0xc2 },
        { INTEL_FAM6_BROADWELL_CORE,    0x04,   0x28 },
        { INTEL_FAM6_BROADWELL_GT3E,    0x01,   0x1b },
@@ -136,8 +135,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
        { INTEL_FAM6_HASWELL_X,         0x02,   0x3b },
        { INTEL_FAM6_HASWELL_X,         0x04,   0x10 },
        { INTEL_FAM6_IVYBRIDGE_X,       0x04,   0x42a },
-       /* Updated in the 20180108 release; blacklist until we know otherwise */
-       { INTEL_FAM6_ATOM_GEMINI_LAKE,  0x01,   0x22 },
        /* Observed in the wild */
        { INTEL_FAM6_SANDYBRIDGE_X,     0x06,   0x61b },
        { INTEL_FAM6_SANDYBRIDGE_X,     0x07,   0x712 },
@@ -149,7 +146,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 
        for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
                if (c->x86_model == spectre_bad_microcodes[i].model &&
-                   c->x86_mask == spectre_bad_microcodes[i].stepping)
+                   c->x86_stepping == spectre_bad_microcodes[i].stepping)
                        return (c->microcode <= spectre_bad_microcodes[i].microcode);
        }
        return false;
@@ -196,7 +193,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
         * need the microcode to have already been loaded... so if it is
         * not, recommend a BIOS update and disable large pages.
         */
-       if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+       if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
            c->microcode < 0x20e) {
                pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
                clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -212,7 +209,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 
        /* CPUID workaround for 0F33/0F34 CPU */
        if (c->x86 == 0xF && c->x86_model == 0x3
-           && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+           && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
                c->x86_phys_bits = 36;
 
        /*
@@ -310,7 +307,7 @@ int ppro_with_ram_bug(void)
        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
            boot_cpu_data.x86 == 6 &&
            boot_cpu_data.x86_model == 1 &&
-           boot_cpu_data.x86_mask < 8) {
+           boot_cpu_data.x86_stepping < 8) {
                pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
                return 1;
        }
@@ -327,7 +324,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
         * Mask B, Pentium, but not Pentium MMX
         */
        if (c->x86 == 5 &&
-           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
            c->x86_model <= 3) {
                /*
                 * Remember we have B step Pentia with bugs
@@ -370,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
         * model 3 mask 3
         */
-       if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+       if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
                clear_cpu_cap(c, X86_FEATURE_SEP);
 
        /*
@@ -388,7 +385,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * P4 Xeon erratum 037 workaround.
         * Hardware prefetcher may cause stale data to be loaded into the cache.
         */
-       if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+       if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
                if (msr_set_bit(MSR_IA32_MISC_ENABLE,
                                MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
                        pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@@ -403,7 +400,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * Specification Update").
         */
        if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
-           (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+           (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
                set_cpu_bug(c, X86_BUG_11AP);
 
 
@@ -650,7 +647,7 @@ static void init_intel(struct cpuinfo_x86 *c)
                case 6:
                        if (l2 == 128)
                                p = "Celeron (Mendocino)";
-                       else if (c->x86_mask == 0 || c->x86_mask == 5)
+                       else if (c->x86_stepping == 0 || c->x86_stepping == 5)
                                p = "Celeron-A";
                        break;
 
index 410629f10ad377176787b1b93ddb36625de2be36..589b948e6e01f01d7388cca456fbecd6019dbca4 100644 (file)
@@ -819,7 +819,7 @@ static __init void rdt_quirks(void)
                        cache_alloc_hsw_probe();
                break;
        case INTEL_FAM6_SKYLAKE_X:
-               if (boot_cpu_data.x86_mask <= 4)
+               if (boot_cpu_data.x86_stepping <= 4)
                        set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
        }
 }
index aa0d5df9dc60e710b22ab7172f0e5fd6e05db2c9..e956eb26706191d27447bc9feec2e9fbde5310c7 100644 (file)
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)        { }
 
 extern struct mca_config mca_cfg;
 
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ *    are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ *    recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
 #endif /* __X86_MCE_INTERNAL_H__ */
diff --git a