+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX2 SMMUv3| #126 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX2 Core | #219 | CAVIUM_TX2_ERRATUM_219 |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
+----------------+-----------------+-----------------+-----------------------------+
Both KASAN modes work with both SLUB and SLAB memory allocators.
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
+To augment reports with last allocation and freeing stack of the physical page,
+it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
+
To disable instrumentation for specific files or directories, add a line
similar to the following to the respective kernel Makefile:
* Advanced Interrupt Controller (AIC)
Required properties:
-- compatible: Should be "atmel,<chip>-aic"
- <chip> can be "at91rm9200", "sama5d2", "sama5d3" or "sama5d4"
+- compatible: Should be:
+ - "atmel,<chip>-aic" where <chip> can be "at91rm9200", "sama5d2",
+ "sama5d3" or "sama5d4"
+ - "microchip,<chip>-aic" where <chip> can be "sam9x60"
+
- interrupt-controller: Identifies the node as an interrupt controller.
- #interrupt-cells: The number of cells to define the interrupts. It should be 3.
The first cell is the IRQ number (aka "Peripheral IDentifier" on datasheet).
allOf:
- $ref: "/schemas/types.yaml#/definitions/string"
- enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15,
- ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI,
- ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1,
- GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2,
- GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12,
- I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7,
- I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC,
- LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2,
+ ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC,
+ ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0,
+ GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
+ GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11,
+ I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6,
+ I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ,
+ LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2,
MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2,
NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3,
NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1,
PWM8, PWM9, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3,
RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12,
SALT13, SALT14, SALT15, SALT16, SALT2, SALT3, SALT4, SALT5,
- SALT6, SALT7, SALT8, SALT9, SD1, SD2, SD3, SD3DAT4, SD3DAT5,
- SD3DAT6, SD3DAT7, SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO,
- SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1,
- SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11,
- TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, TACH4, TACH5,
- TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, THRU3, TXD1,
- TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13, UART6, UART7,
- UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3,
- WDTRST4, ]
+ SALT6, SALT7, SALT8, SALT9, SD1, SD2, SGPM1, SGPS1, SIOONCTRL,
+ SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1,
+ SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1,
+ TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
+ TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
+ THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13,
+ UART6, UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2,
+ WDTRST3, WDTRST4, ]
groups:
allOf:
- $ref: "/schemas/types.yaml#/definitions/string"
- enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15,
- ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI,
- ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, GPIT0,
- GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
- GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1,
- I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3,
- I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6,
- JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ,
- MACLINK1, MACLINK2, MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3,
- MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4,
- NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1,
- NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE,
- PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1, PWM12G0, PWM12G1,
- PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0, PWM15G1, PWM2, PWM3,
- PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1, PWM9G0, PWM9G1, QSPI1,
- QSPI2, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3,
- RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10G0, SALT10G1,
- SALT11G0, SALT11G1, SALT12G0, SALT12G1, SALT13G0, SALT13G1,
- SALT14G0, SALT14G1, SALT15G0, SALT15G1, SALT16G0, SALT16G1,
- SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, SALT9G0,
- SALT9G1, SD1, SD2, SD3, SD3DAT4, SD3DAT5, SD3DAT6, SD3DAT7,
- SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD,
- SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1, SPI1WP, SPI2,
- SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, TACH12, TACH13,
- TACH14, TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, TACH7, TACH8,
- TACH9, THRU0, THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, TXD4,
- UART10, UART11, UART12G0, UART12G1, UART13G0, UART13G1, UART6,
- UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3,
- WDTRST4, ]
+ ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1,
+ EMMCG4, EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID,
+ FWQSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5,
+ GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6,
+ GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14,
+ I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9,
+ I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD,
+ LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, MACLINK3, MACLINK4,
+ MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1,
+ NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2,
+ NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4,
+ OSCCLK, PEWAKE, PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1,
+ PWM12G0, PWM12G1, PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0,
+ PWM15G1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1,
+ PWM9G0, PWM9G1, QSPI1, QSPI2, RGMII1, RGMII2, RGMII3, RGMII4,
+ RMII1, RMII2, RMII3, RMII4, RXD1, RXD2, RXD3, RXD4, SALT1,
+ SALT10G0, SALT10G1, SALT11G0, SALT11G1, SALT12G0, SALT12G1,
+ SALT13G0, SALT13G1, SALT14G0, SALT14G1, SALT15G0, SALT15G1,
+ SALT16G0, SALT16G1, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7,
+ SALT8, SALT9G0, SALT9G1, SD1, SD2, SD3, SGPM1, SGPS1, SIOONCTRL,
+ SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1,
+ SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1,
+ TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
+ TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
+ THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12G0,
+ UART12G1, UART13G0, UART13G1, UART6, UART7, UART8, UART9, VB,
+ VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4, ]
required:
- compatible
=======
For general Linux networking support, please use the netdev mailing
list, which is monitored by Pensando personnel::
+
netdev@vger.kernel.org
For more specific support needs, please use the Pensando driver support
email::
- drivers@pensando.io
+
+ drivers@pensando.io
Part III: Registering a Network Device to DIM
==============================================
-Net DIM API exposes the main function net_dim(struct net_dim *dim,
-struct net_dim_sample end_sample). This function is the entry point to the Net
+Net DIM API exposes the main function net_dim(struct dim *dim,
+struct dim_sample end_sample). This function is the entry point to the Net
DIM algorithm and has to be called every time the driver would like to check if
it should change interrupt moderation parameters. The driver should provide two
-data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+data structures: struct dim and struct dim_sample. Struct dim
describes the state of DIM for a specific object (RX queue, TX queue,
other queues, etc.). This includes the current selected profile, previous data
samples, the callback function provided by the driver and more.
-Struct net_dim_sample describes a data sample, which will be compared to the
-data sample stored in struct net_dim in order to decide on the algorithm's next
+Struct dim_sample describes a data sample, which will be compared to the
+data sample stored in struct dim in order to decide on the algorithm's next
step. The sample should include bytes, packets and interrupts, measured by
the driver.
interrupt. Since Net DIM has a built-in moderation and it might decide to skip
iterations under certain conditions, there is no need to moderate the net_dim()
calls as well. As mentioned above, the driver needs to provide an object of type
-struct net_dim to the net_dim() function call. It is advised for each entity
-using Net DIM to hold a struct net_dim as part of its data structure and use it
-as the main Net DIM API object. The struct net_dim_sample should hold the latest
+struct dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct dim as part of its data structure and use it
+as the main Net DIM API object. The struct dim_sample should hold the latest
bytes, packets and interrupts count. No need to perform any calculations, just
include the raw data.
my_driver.c:
-#include <linux/net_dim.h>
+#include <linux/dim.h>
/* Callback for net DIM to schedule on a decision to change moderation */
void my_driver_do_dim_work(struct work_struct *work)
{
- /* Get struct net_dim from struct work_struct */
- struct net_dim *dim = container_of(work, struct net_dim,
- work);
+ /* Get struct dim from struct work_struct */
+ struct dim *dim = container_of(work, struct dim,
+ work);
/* Do interrupt moderation related stuff */
...
/* Signal net DIM work is done and it should move to next iteration */
- dim->state = NET_DIM_START_MEASURE;
+ dim->state = DIM_START_MEASURE;
}
/* My driver's interrupt handler */
{
...
/* A struct to hold current measured data */
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
...
/* Initiate data sample struct with current data */
- net_dim_sample(my_entity->events,
- my_entity->packets,
- my_entity->bytes,
- &dim_sample);
+ dim_update_sample(my_entity->events,
+ my_entity->packets,
+ my_entity->bytes,
+ &dim_sample);
/* Call net DIM */
net_dim(&my_entity->dim, dim_sample);
...
KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
indicating the number of supported registers.
+For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether
+the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported.
+
When debug events exit the main run loop with the reason
KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
structure containing architecture specific debug information.
Device type supported: KVM_DEV_TYPE_XICS
Groups:
- KVM_DEV_XICS_SOURCES
+ 1. KVM_DEV_XICS_GRP_SOURCES
Attributes: One per interrupt source, indexed by the source number.
+ 2. KVM_DEV_XICS_GRP_CTRL
+ Attributes:
+ 2.1 KVM_DEV_XICS_NR_SERVERS (write only)
+ The kvm_device_attr.addr points to a __u32 value which is the number of
+ interrupt server numbers (ie, highest possible vcpu id plus one).
+ Errors:
+ -EINVAL: Value greater than KVM_MAX_VCPU_ID.
+ -EFAULT: Invalid user pointer for attr->addr.
+ -EBUSY: A vcpu is already connected to the device.
+
This device emulates the XICS (eXternal Interrupt Controller
Specification) defined in PAPR. The XICS has a set of interrupt
sources, each identified by a 20-bit source number, and a set of
Each source has 64 bits of state that can be read and written using
the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
-KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+KVM_DEV_XICS_GRP_SOURCES attribute group, with the attribute number being
the interrupt source number. The 64 bit state word has the following
bitfields, starting from the least-significant end of the word:
migrating the VM.
Errors: none
+ 1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
+ The kvm_device_attr.addr points to a __u32 value which is the number of
+ interrupt server numbers (ie, highest possible vcpu id plus one).
+ Errors:
+ -EINVAL: Value greater than KVM_MAX_VCPU_ID.
+ -EFAULT: Invalid user pointer for attr->addr.
+ -EBUSY: A vCPU is already connected to the device.
+
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
Initializes a new source in the XIVE device and mask it.
Attributes:
F: include/linux/ks0108.h
L3MDEV
-M: David Ahern <dsa@cumulusnetworks.com>
+M: David Ahern <dsahern@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: net/l3mdev
M: Felix Fietkau <nbd@openwrt.org>
M: John Crispin <john@phrozen.org>
M: Sean Wang <sean.wang@mediatek.com>
-M: Nelson Chang <nelson.chang@mediatek.com>
+M: Mark Lee <Mark-MC.Lee@mediatek.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/mediatek/
F: Documentation/parisc/
F: drivers/parisc/
F: drivers/char/agp/parisc-agp.c
+F: drivers/input/misc/hp_sdc_rtc.c
F: drivers/input/serio/gscps2.c
+F: drivers/input/serio/hp_sdc*
F: drivers/parport/parport_gsc.*
F: drivers/tty/serial/8250/8250_gsc.c
F: drivers/video/fbdev/sti*
F: drivers/video/console/sti*
F: drivers/video/logo/logo_parisc*
+F: include/linux/hp_sdc.h
PARMAN
M: Jiri Pirko <jiri@mellanox.com>
F: drivers/scsi/qla1280.[ch]
QLOGIC QLA2XXX FC-SCSI DRIVER
-M: qla2xxx-upstream@qlogic.com
+M: hmadhani@marvell.com
L: linux-scsi@vger.kernel.org
S: Supported
F: Documentation/scsi/LICENSE.qla2xxx
K: regulator_get_optional
VRF
-M: David Ahern <dsa@cumulusnetworks.com>
+M: David Ahern <dsahern@kernel.org>
M: Shrijeet Mukherjee <shrijeet@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
VERSION = 5
PATCHLEVEL = 4
SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
NAME = Nesting Opossum
# *DOCUMENTATION*
export KBUILD_VMLINUX_LIBS := $(libs-y1)
export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds
export LDFLAGS_vmlinux
-# used by scripts/package/Makefile
+# used by scripts/Makefile.package
export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) LICENSES arch include scripts tools)
vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_OBJS) $(KBUILD_VMLINUX_LIBS)
pinctrl-1 = <&ephy_leds_pins>;
status = "okay";
+ gmac0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "2500base-x";
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ pause;
+ };
+ };
+
gmac1: mac@1 {
compatible = "mediatek,eth-mac";
reg = <1>;
+ phy-mode = "gmii";
phy-handle = <&phy0>;
};
phy0: ethernet-phy@0 {
reg = <0>;
- phy-mode = "gmii";
};
};
};
compatible = "mediatek,mt7629-sgmiisys", "syscon";
reg = <0x1b128000 0x3000>;
#clock-cells = <1>;
- mediatek,physpeed = "2500";
};
sgmiisys1: syscon@1b130000 {
compatible = "mediatek,mt7629-sgmiisys", "syscon";
reg = <0x1b130000 0x3000>;
#clock-cells = <1>;
- mediatek,physpeed = "2500";
};
};
};
If unsure, say Y.
+config CAVIUM_TX2_ERRATUM_219
+ bool "Cavium ThunderX2 erratum 219: PRFM between TTBR change and ISB fails"
+ default y
+ help
+ On Cavium ThunderX2, a load, store or prefetch instruction between a
+ TTBR update and the corresponding context synchronizing operation can
+ cause a spurious Data Abort to be delivered to any hardware thread in
+ the CPU core.
+
+ Work around the issue by avoiding the problematic code sequence and
+ trapping KVM guest TTBRx_EL1 writes to EL2 when SMT is enabled. The
+ trap handler performs the corresponding register access, skips the
+ instruction and ensures context synchronization by virtue of the
+ exception return.
+
+ If unsure, say Y.
+
config QCOM_FALKOR_ERRATUM_1003
bool "Falkor E1003: Incorrect translation due to ASID change"
default y
/*
* Remove the address tag from a virtual address, if present.
*/
- .macro clear_address_tag, dst, addr
- tst \addr, #(1 << 55)
- bic \dst, \addr, #(0xff << 56)
- csel \dst, \dst, \addr, eq
+ .macro untagged_addr, dst, addr
+ sbfx \dst, \addr, #0, #56
+ and \dst, \dst, \addr
.endm
#endif
#define ARM64_HAS_IRQ_PRIO_MASKING 42
#define ARM64_HAS_DCPODP 43
#define ARM64_WORKAROUND_1463225 44
+#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45
+#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46
-#define ARM64_NCAPS 45
+#define ARM64_NCAPS 47
#endif /* __ASM_CPUCAPS_H */
* up with a tagged userland pointer. Clear the tag to get a sane pointer to
* pass on to access_ok(), for instance.
*/
-#define untagged_addr(addr) \
+#define __untagged_addr(addr) \
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
+#define untagged_addr(addr) ({ \
+ u64 __addr = (__force u64)addr; \
+ __addr &= __untagged_addr(__addr); \
+ (__force __typeof__(addr))__addr; \
+})
+
#ifdef CONFIG_KASAN_SW_TAGS
#define __tag_shifted(tag) ((u64)(tag) << 56)
-#define __tag_reset(addr) untagged_addr(addr)
+#define __tag_reset(addr) __untagged_addr(addr)
#define __tag_get(addr) (__u8)((u64)(addr) >> 56)
#else
#define __tag_shifted(tag) 0UL
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-#define kc_vaddr_to_offset(v) ((v) & ~PAGE_END)
-#define kc_offset_to_vaddr(o) ((o) | PAGE_END)
-
#ifdef CONFIG_ARM64_PA_BITS_52
#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
#else
#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
-#define SYS_PAR_EL1_F BIT(1)
+#define SYS_PAR_EL1_F BIT(0)
#define SYS_PAR_EL1_FST GENMASK(6, 1)
/*** Statistical Profiling Extension ***/
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/smp_plat.h>
static bool __maybe_unused
is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
return (need_wa > 0);
}
+static const __maybe_unused struct midr_range tx2_family_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+ MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+ {},
+};
+
+static bool __maybe_unused
+needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ int i;
+
+ if (!is_affected_midr_range_list(entry, scope) ||
+ !is_hyp_mode_available())
+ return false;
+
+ for_each_possible_cpu(i) {
+ if (MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0) != 0)
+ return true;
+ }
+
+ return false;
+}
+
#ifdef CONFIG_HARDEN_EL2_VECTORS
static const struct midr_range arm64_harden_el2_vectors[] = {
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = has_cortex_a76_erratum_1463225,
},
+#endif
+#ifdef CONFIG_CAVIUM_TX2_ERRATUM_219
+ {
+ .desc = "Cavium ThunderX2 erratum 219 (KVM guest sysreg trapping)",
+ .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_TVM,
+ ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
+ .matches = needs_tx2_tvm_workaround,
+ },
+ {
+ .desc = "Cavium ThunderX2 erratum 219 (PRFM removal)",
+ .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM,
+ ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
+ },
#endif
{
}
};
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
ARM64_FTR_END,
};
*/
mrs x3, far_el1
inherit_daif pstate=x23, tmp=x2
- clear_address_tag x0, x3
+ untagged_addr x0, x3
mov x2, sp // struct pt_regs
bl do_mem_abort
orr x24, x24, x0
alternative_else_nop_endif
cbnz x24, 1f // preempt count != 0 || NMI return path
- bl preempt_schedule_irq // irq en/disable is done inside
+ bl arm64_preempt_schedule_irq // irq en/disable is done inside
1:
#endif
mrs x26, far_el1
ct_user_exit_irqoff
enable_daif
- clear_address_tag x0, x26
+ untagged_addr x0, x26
mov x1, x25
mov x2, sp
bl do_mem_abort
#else
ldr x30, =vectors
#endif
+alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
prfm plil1strm, [x30, #(1b - tramp_vectors)]
+alternative_else_nop_endif
msr vbar_el1, x30
add x30, x30, #(1b - tramp_vectors)
isb
gfp_t mask)
{
int rc = 0;
+ pgd_t *trans_pgd;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
memcpy((void *)dst, src_start, length);
__flush_icache_range(dst, dst + length);
- pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+ trans_pgd = allocator(mask);
+ if (!trans_pgd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ pgdp = pgd_offset_raw(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
pudp = allocator(mask);
if (!pudp) {
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <asm/alternative.h>
#include <asm/arch_gicv3.h>
#include <asm/compat.h>
+#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
core_initcall(tagged_addr_init);
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
+
+asmlinkage void __sched arm64_preempt_schedule_irq(void)
+{
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * Preempting a task from an IRQ means we leave copies of PSTATE
+ * on the stack. cpufeature's enable calls may modify PSTATE, but
+ * resuming one of these preempted tasks would undo those changes.
+ *
+ * Only allow a task to be preempted once cpufeatures have been
+ * enabled.
+ */
+ if (static_branch_likely(&arm64_const_caps_ready))
+ preempt_schedule_irq();
+}
{
u64 hcr = vcpu->arch.hcr_el2;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+ hcr |= HCR_TVM;
+
write_sysreg(hcr, hcr_el2);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
* the crucial bit is "On taking a vSError interrupt,
* HCR_EL2.VSE is cleared to 0."
*/
- if (vcpu->arch.hcr_el2 & HCR_VSE)
- vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
+ if (vcpu->arch.hcr_el2 & HCR_VSE) {
+ vcpu->arch.hcr_el2 &= ~HCR_VSE;
+ vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ }
if (has_vhe())
deactivate_traps_vhe();
return true;
}
+static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
+ int rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ /*
+ * The normal sysreg handling code expects to see the traps,
+ * let's not do anything here.
+ */
+ if (vcpu->arch.hcr_el2 & HCR_TVM)
+ return false;
+
+ switch (sysreg) {
+ case SYS_SCTLR_EL1:
+ write_sysreg_el1(val, SYS_SCTLR);
+ break;
+ case SYS_TTBR0_EL1:
+ write_sysreg_el1(val, SYS_TTBR0);
+ break;
+ case SYS_TTBR1_EL1:
+ write_sysreg_el1(val, SYS_TTBR1);
+ break;
+ case SYS_TCR_EL1:
+ write_sysreg_el1(val, SYS_TCR);
+ break;
+ case SYS_ESR_EL1:
+ write_sysreg_el1(val, SYS_ESR);
+ break;
+ case SYS_FAR_EL1:
+ write_sysreg_el1(val, SYS_FAR);
+ break;
+ case SYS_AFSR0_EL1:
+ write_sysreg_el1(val, SYS_AFSR0);
+ break;
+ case SYS_AFSR1_EL1:
+ write_sysreg_el1(val, SYS_AFSR1);
+ break;
+ case SYS_MAIR_EL1:
+ write_sysreg_el1(val, SYS_MAIR);
+ break;
+ case SYS_AMAIR_EL1:
+ write_sysreg_el1(val, SYS_AMAIR);
+ break;
+ case SYS_CONTEXTIDR_EL1:
+ write_sysreg_el1(val, SYS_CONTEXTIDR);
+ break;
+ default:
+ return false;
+ }
+
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
/*
* We trap the first access to the FP/SIMD to save the host context
* and restore the guest context lazily.
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+ if (!system_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, r->reg) = val;
}
val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
+ if (!system_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
kvm_vcpu_pmu_restore_guest(vcpu);
par = read_sysreg(par_el1);
local_irq_restore(flags);
+ /*
+ * If we now have a valid translation, treat the translation fault as
+ * spurious.
+ */
if (!(par & SYS_PAR_EL1_F))
- return false;
+ return true;
/*
* If we got a different type of fault from the AT instruction,
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_mostly __section(.data..read_mostly)
void parisc_cache_init(void); /* initializes cache-flushing */
void disable_sr_hashing_asm(int); /* low level support for above */
})
#ifdef CONFIG_SMP
-# define __lock_aligned __attribute__((__section__(".data..lock_aligned")))
+# define __lock_aligned __section(.data..lock_aligned)
#endif
#endif /* __PARISC_LDCW_H */
* arch/parisc/mm/ioremap.c
*
* (C) Copyright 1995 1996 Linus Torvalds
- * (C) Copyright 2001-2006 Helge Deller <deller@gmx.de>
+ * (C) Copyright 2001-2019 Helge Deller <deller@gmx.de>
* (C) Copyright 2005 Kyle McMartin <kyle@parisc-linux.org>
*/
addr = (void __iomem *) area->addr;
if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
phys_addr, pgprot)) {
- vfree(addr);
+ vunmap(addr);
return NULL;
}
}
EXPORT_SYMBOL(__ioremap);
-void iounmap(const volatile void __iomem *addr)
+void iounmap(const volatile void __iomem *io_addr)
{
- if (addr > high_memory)
- return vfree((void *) (PAGE_MASK & (unsigned long __force) addr));
+ unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
+
+ if (is_vmalloc_addr((void *)addr))
+ vunmap((void *)addr);
}
EXPORT_SYMBOL(iounmap);
u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data, bool iswrite);
- void (*reset_msr)(struct kvm_vcpu *vcpu);
void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
union kvmppc_one_reg *val);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
#define SPRN_USPRG7 0x107 /* SPRG7 userspace read */
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * Bits loaded from MSR upon interrupt.
+ * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are
+ * loaded from MSR. The exception is that SRESET and MCE do not always load
+ * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses
+ * it.
+ */
+#define SRR1_MSR_BITS (~0x783f0000UL)
+#endif
+
#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL 2
+#define KVM_DEV_XICS_NR_SERVERS 1
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
#define KVM_DEV_XIVE_GRP_CTRL 1
#define KVM_DEV_XIVE_RESET 1
#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_NR_SERVERS 3
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
{ NULL }
};
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
- ulong pc = kvmppc_get_pc(vcpu);
- ulong lr = kvmppc_get_lr(vcpu);
- if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
- if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
- }
-}
-EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
- if (!is_kvmppc_hv_enabled(vcpu->kvm))
- return to_book3s(vcpu)->hior;
- return 0;
-}
-
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
unsigned long pending_now, unsigned long old_pending)
{
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- kvmppc_unfixup_split_real(vcpu);
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags);
- kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
- vcpu->arch.mmu.reset_msr(vcpu);
+ vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
}
static int kvmppc_book3s_vec2irqprio(unsigned int vec)
static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
#endif
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
#endif
return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
}
-static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
-{
- kvmppc_set_msr(vcpu, 0);
-}
-
static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
u32 sre, gva_t eaddr,
bool primary)
mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
mmu->xlate = kvmppc_mmu_book3s_32_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
#define dprintk(X...) do { } while(0)
#endif
-static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
- unsigned long cur_msr = kvmppc_get_msr(vcpu);
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(cur_msr))
- msr |= MSR_TS_S;
- else
- msr |= cur_msr & MSR_TS_MASK;
-
- kvmppc_set_msr(vcpu, msr);
-}
-
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
struct kvm_vcpu *vcpu,
gva_t eaddr)
mmu->slbie = kvmppc_mmu_book3s_64_slbie;
mmu->slbia = kvmppc_mmu_book3s_64_slbia;
mmu->xlate = kvmppc_mmu_book3s_64_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
return 0;
}
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
- msr |= MSR_TS_S;
- else
- msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
- kvmppc_set_msr(vcpu, msr);
-}
-
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
struct vm_area_struct *vma;
unsigned long rcbits;
long mmio_update;
+ struct mm_struct *mm;
if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
is_ci = false;
pfn = 0;
page = NULL;
+ mm = current->mm;
pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
if (npages < 1) {
/* Check if it's an I/O mapping */
- down_read(¤t->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, hva);
if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
(vma->vm_flags & VM_PFNMAP)) {
pfn = vma->vm_pgoff +
is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
write_ok = vma->vm_flags & VM_WRITE;
}
- up_read(¤t->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
if (!pfn)
goto out_put;
} else {
* hugepage split and collapse.
*/
local_irq_save(flags);
- ptep = find_current_mm_pte(current->mm->pgd,
- hva, NULL, NULL);
+ ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte))
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
kfree(ctx);
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
return ret;
}
vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
}
if (ret >= 0)
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
else
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
mutex_unlock(&kvm->lock);
/* If set, the threads on each CPU core have to be in the same MMU mode */
static bool no_mixing_hpt_and_radix;
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
-static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
-{
- /*
- * Check for illegal transactional state bit combination
- * and if we find it, force the TS field to a safe state.
- */
- if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
- msr &= ~MSR_TS_MASK;
- vcpu->arch.shregs.msr = msr;
- kvmppc_end_cede(vcpu);
-}
-
static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
vcpu->arch.pvr = pvr;
vcpu->arch.dawr = value1;
vcpu->arch.dawrx = value2;
return H_SUCCESS;
+ case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+ /* KVM does not support mflags=2 (AIL=2) */
+ if (mflags != 0 && mflags != 3)
+ return H_UNSUPPORTED_FLAG_START;
+ return H_TOO_HARD;
default:
return H_TOO_HARD;
}
vcpu->arch.timer_running = 1;
}
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.ceded = 0;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
- }
-}
-
extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
.set_one_reg = kvmppc_set_one_reg_hv,
.vcpu_load = kvmppc_core_vcpu_load_hv,
.vcpu_put = kvmppc_core_vcpu_put_hv,
+ .inject_interrupt = kvmppc_inject_interrupt_hv,
.set_msr = kvmppc_set_msr_hv,
.vcpu_run = kvmppc_vcpu_run_hv,
.vcpu_create = kvmppc_core_vcpu_create_hv,
local_paca->kvm_hstate.kvm_split_mode = NULL;
}
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+ /*
+ * Check for illegal transactional state bit combination
+ * and if we find it, force the TS field to a safe state.
+ */
+ if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+ msr &= ~MSR_TS_MASK;
+ vcpu->arch.shregs.msr = msr;
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = vec;
+
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+
+ /*
+ * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+ * applicable. AIL=2 is not supported.
+ *
+ * AIL does not apply to SRESET, MCE, or HMI (which is never
+ * delivered to the guest), and does not apply if IR=0 or DR=0.
+ */
+ if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+ vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+ (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+ (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+ new_msr |= MSR_IR | MSR_DR;
+ new_pc += 0xC000000000004000ULL;
+ }
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ vcpu->arch.shregs.msr = new_msr;
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ inject_interrupt(vcpu, vec, srr1_flags);
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
/*
* Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
* Can we inject a Decrementer or a External interrupt?
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
{
int ext;
- unsigned long vec = 0;
unsigned long lpcr;
/* Insert EXTERNAL bit into LPCR at the MER bit position */
if (vcpu->arch.shregs.msr & MSR_EE) {
if (ext) {
- vec = BOOK3S_INTERRUPT_EXTERNAL;
+ inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
} else {
long int dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD))
dec = (int) dec;
if (dec < 0)
- vec = BOOK3S_INTERRUPT_DECREMENTER;
+ inject_interrupt(vcpu,
+ BOOK3S_INTERRUPT_DECREMENTER, 0);
}
}
- if (vec) {
- unsigned long msr, old_msr = vcpu->arch.shregs.msr;
-
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, old_msr);
- kvmppc_set_pc(vcpu, vec);
- msr = vcpu->arch.intr_msr;
- if (MSR_TM_ACTIVE(old_msr))
- msr |= MSR_TS_S;
- vcpu->arch.shregs.msr = msr;
- }
if (vcpu->arch.doorbell_request) {
mtspr(SPRN_DPDES, 1);
forward_to_l1:
vcpu->arch.fault_dsisr = flags;
if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
- vcpu->arch.shregs.msr &= ~0x783f0000ul;
+ vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
vcpu->arch.shregs.msr |= flags;
}
return RESUME_HOST;
kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
}
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+ ulong pc = kvmppc_get_pc(vcpu);
+ ulong lr = kvmppc_get_lr(vcpu);
+ if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+ if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+ }
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ kvmppc_unfixup_split_real(vcpu);
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+#endif
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ kvmppc_set_msr(vcpu, new_msr);
+}
static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
{
#else
/* default to book3s_32 (750) */
vcpu->arch.pvr = 0x84202;
+ vcpu->arch.intr_msr = 0;
#endif
kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
.set_one_reg = kvmppc_set_one_reg_pr,
.vcpu_load = kvmppc_core_vcpu_load_pr,
.vcpu_put = kvmppc_core_vcpu_put_pr,
+ .inject_interrupt = kvmppc_inject_interrupt_pr,
.set_msr = kvmppc_set_msr_pr,
.vcpu_run = kvmppc_vcpu_run_pr,
.vcpu_create = kvmppc_core_vcpu_create_pr,
vcpu->arch.xive_vcpu = NULL;
}
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+ /* We have a block of xive->nr_servers VPs. We just need to check
+ * raw vCPU ids are below the expected limit for this guest's
+ * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
+ * index that can be safely used to compute a VP id that belongs
+ * to the VP block.
+ */
+ return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+ u32 vp_id;
+
+ if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+
+ if (xive->vp_base == XIVE_INVALID_VP) {
+ xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+ pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+ if (xive->vp_base == XIVE_INVALID_VP)
+ return -ENOSPC;
+ }
+
+ vp_id = kvmppc_xive_vp(xive, cpu);
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+
+ *vp = vp_id;
+
+ return 0;
+}
+
int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu)
{
struct kvmppc_xive *xive = dev->private;
struct kvmppc_xive_vcpu *xc;
int i, r = -EBUSY;
+ u32 vp_id;
pr_devel("connect_vcpu(cpu=%d)\n", cpu);
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
- pr_devel("Duplicate !\n");
- return -EEXIST;
- }
- if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
- xc = kzalloc(sizeof(*xc), GFP_KERNEL);
- if (!xc)
- return -ENOMEM;
/* We need to synchronize with queue provisioning */
mutex_lock(&xive->lock);
+
+ r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+ if (r)
+ goto bail;
+
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc) {
+ r = -ENOMEM;
+ goto bail;
+ }
+
vcpu->arch.xive_vcpu = xc;
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
- xc->vp_id = kvmppc_xive_vp(xive, cpu);
+ xc->vp_id = vp_id;
xc->mfrr = 0xff;
xc->valid = true;
return 0;
}
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+ u32 __user *ubufp = (u32 __user *) addr;
+ u32 nr_servers;
+ int rc = 0;
+
+ if (get_user(nr_servers, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+ if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+ return -EINVAL;
+
+ mutex_lock(&xive->lock);
+ if (xive->vp_base != XIVE_INVALID_VP)
+ /* The VP block is allocated once and freed when the device
+ * is released. Better not allow to change its size since its
+ * used by connect_vcpu to validate vCPU ids are valid (eg,
+ * setting it back to a higher value could allow connect_vcpu
+ * to come up with a VP id that goes beyond the VP block, which
+ * is likely to cause a crash in OPAL).
+ */
+ rc = -EBUSY;
+ else if (nr_servers > KVM_MAX_VCPUS)
+ /* We don't need more servers. Higher vCPU ids get packed
+ * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
+ else
+ xive->nr_servers = nr_servers;
+
+ mutex_unlock(&xive->lock);
+
+ return rc;
+}
+
static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xive *xive = dev->private;
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xive_set_source(xive, attr->attr, attr->addr);
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
+ }
}
return -ENXIO;
}
attr->attr < KVMPPC_XICS_NR_IRQS)
return 0;
break;
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return 0;
+ }
}
return -ENXIO;
}
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive for partition\n");
+ /* Already there ? */
+ if (kvm->arch.xive)
+ return -EEXIST;
+
xive = kvmppc_xive_get_device(kvm, type);
if (!xive)
return -ENOMEM;
xive->kvm = kvm;
mutex_init(&xive->lock);
- /* Already there ? */
- if (kvm->arch.xive)
- ret = -EEXIST;
- else
- kvm->arch.xive = xive;
-
/* We use the default queue size set by the host */
xive->q_order = xive_native_default_eq_shift();
if (xive->q_order < PAGE_SHIFT)
else
xive->q_page_order = xive->q_order - PAGE_SHIFT;
- /* Allocate a bunch of VPs */
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENOMEM;
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
- if (ret)
- return ret;
-
+ kvm->arch.xive = xive;
return 0;
}
if (!xc)
continue;
- seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+ seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
" MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
- xc->server_num, xc->cppr, xc->hw_cppr,
+ xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
/* Flags */
u8 single_escalation;
+ /* Number of entries in the VP block */
+ u32 nr_servers;
+
struct kvmppc_xive_ops *ops;
struct address_space *mapping;
struct mutex mapping_lock;
return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
}
+static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
+ return true;
+ }
+ return false;
+}
+
/*
* Mapping between guest priorities and host priorities
* is as follow.
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
struct kvmppc_xive *xive = dev->private;
struct kvmppc_xive_vcpu *xc = NULL;
int rc;
+ u32 vp_id;
pr_devel("native_connect_vcpu(server=%d)\n", server_num);
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
mutex_lock(&xive->lock);
- if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
- pr_devel("Duplicate !\n");
- rc = -EEXIST;
+ rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+ if (rc)
goto bail;
- }
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
if (!xc) {
xc->vcpu = vcpu;
xc->server_num = server_num;
- xc->vp_id = kvmppc_xive_vp(xive, server_num);
+ xc->vp_id = vp_id;
xc->valid = true;
vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
return kvmppc_xive_reset(xive);
case KVM_DEV_XIVE_EQ_SYNC:
return kvmppc_xive_native_eq_sync(xive);
+ case KVM_DEV_XIVE_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
}
break;
case KVM_DEV_XIVE_GRP_SOURCE:
switch (attr->attr) {
case KVM_DEV_XIVE_RESET:
case KVM_DEV_XIVE_EQ_SYNC:
+ case KVM_DEV_XIVE_NR_SERVERS:
return 0;
}
break;
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive native device\n");
dev->private = xive;
xive->dev = dev;
xive->kvm = kvm;
- kvm->arch.xive = xive;
mutex_init(&xive->mapping_lock);
mutex_init(&xive->lock);
- /*
- * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
- * a default. Getting the max number of CPUs the VM was
- * configured with would improve our usage of the XIVE VP space.
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
*/
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENXIO;
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
xive->ops = &kvmppc_xive_native_ops;
- if (ret)
- return ret;
-
+ kvm->arch.xive = xive;
return 0;
}
if (!xc)
continue;
- seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
- xc->server_num,
+ seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num, xc->vp_id,
vcpu->arch.xive_saved_state.nsr,
vcpu->arch.xive_saved_state.cppr,
vcpu->arch.xive_saved_state.ipb,
if (tlbsel == 1) {
struct vm_area_struct *vma;
- down_read(¤t->mm->mmap_sem);
+ down_read(&kvm->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ vma = find_vma(kvm->mm, hva);
if (vma && hva >= vma->vm_start &&
(vma->vm_flags & VM_PFNMAP)) {
/*
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
}
- up_read(¤t->mm->mmap_sem);
+ up_read(&kvm->mm->mmap_sem);
}
if (likely(!pfnmap)) {
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
+ case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
+ /* fall through */
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000";
chosen {
+ stdout-path = "serial0";
};
cpus {
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-#define FIXADDR_TOP VMALLOC_START
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE PMD_SIZE
-#else
-#define FIXADDR_SIZE PGDIR_SIZE
-#endif
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
/*
* Roughly size the vmemmap space to be large enough to fit enough
* struct pages to map half the virtual address space. Then
#define vmemmap ((struct page *)VMEMMAP_START)
+#define FIXADDR_TOP (VMEMMAP_START)
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE PMD_SIZE
+#else
+#define FIXADDR_SIZE PGDIR_SIZE
+#endif
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
/*
* ZERO_PAGE is a global shared page that is always zero,
* used for zero-mapped memory areas, etc.
#include <linux/mm_types.h>
#include <asm/smp.h>
-/*
- * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
- * cache as well, so a 'fence.i' is not necessary.
- */
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
asmlinkage void do_trap_break(struct pt_regs *regs)
{
- if (!user_mode(regs)) {
+ if (user_mode(regs)) {
+ force_sig_fault(SIGTRAP, TRAP_BRKPT,
+ (void __user *)(regs->sepc));
+ return;
+ }
+#ifdef CONFIG_GENERIC_BUG
+ {
enum bug_trap_type type;
type = report_bug(regs->sepc, regs);
- switch (type) {
-#ifdef CONFIG_GENERIC_BUG
- case BUG_TRAP_TYPE_WARN:
+ if (type == BUG_TRAP_TYPE_WARN) {
regs->sepc += get_break_insn_length(regs->sepc);
return;
- case BUG_TRAP_TYPE_BUG:
-#endif /* CONFIG_GENERIC_BUG */
- default:
- die(regs, "Kernel BUG");
}
- } else {
- force_sig_fault(SIGTRAP, TRAP_BRKPT,
- (void __user *)(regs->sepc));
}
+#endif /* CONFIG_GENERIC_BUG */
+
+ die(regs, "Kernel BUG");
}
#ifdef CONFIG_GENERIC_BUG
u64 diagnose_10;
u64 diagnose_44;
u64 diagnose_9c;
+ u64 diagnose_9c_ignored;
u64 diagnose_258;
u64 diagnose_308;
u64 diagnose_500;
tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
vcpu->stat.diagnose_9c++;
- VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+ /* yield to self */
if (tid == vcpu->vcpu_id)
- return 0;
+ goto no_yield;
+ /* yield to invalid */
tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
- if (tcpu)
- kvm_vcpu_yield_to(tcpu);
+ if (!tcpu)
+ goto no_yield;
+
+ /* target already running */
+ if (READ_ONCE(tcpu->cpu) >= 0)
+ goto no_yield;
+
+ if (kvm_vcpu_yield_to(tcpu) <= 0)
+ goto no_yield;
+
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid);
+ return 0;
+no_yield:
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
+ vcpu->stat.diagnose_9c_ignored++;
return 0;
}
return 0;
}
-static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
- struct kvm_s390_irq *irq)
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
rc = __inject_sigp_stop(vcpu, irq);
break;
case KVM_S390_RESTART:
- rc = __inject_sigp_restart(vcpu, irq);
+ rc = __inject_sigp_restart(vcpu);
break;
case KVM_S390_INT_CLOCK_COMP:
rc = __inject_ckc(vcpu);
{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+ { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
int kvm_arch_init(void *opaque)
{
- int rc;
+ int rc = -ENOMEM;
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
if (!kvm_s390_dbf)
return -ENOMEM;
- if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
- rc = -ENOMEM;
- goto out_debug_unreg;
- }
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+ goto out;
kvm_s390_cpu_feat_init();
rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
if (rc) {
pr_err("A FLIC registration call failed with rc=%d\n", rc);
- goto out_debug_unreg;
+ goto out;
}
rc = kvm_s390_gib_init(GAL_ISC);
if (rc)
- goto out_gib_destroy;
+ goto out;
return 0;
-out_gib_destroy:
- kvm_s390_gib_destroy();
-out_debug_unreg:
- debug_unregister(kvm_s390_dbf);
+out:
+ kvm_arch_exit();
return rc;
}
select RTC_DRV_M48T59
select RTC_SYSTOHC
select HAVE_ARCH_JUMP_LABEL if SPARC64
- select HAVE_FAST_GUP if SPARC64
select GENERIC_IRQ_SHOW
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_PCI_IOMAP
*/
struct mem_vector immovable_mem[MAX_NUMNODES*2];
-/*
- * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
- * digits, and '\0' for termination.
- */
-#define MAX_ADDR_LEN 19
-
-static acpi_physical_address get_cmdline_acpi_rsdp(void)
-{
- acpi_physical_address addr = 0;
-
-#ifdef CONFIG_KEXEC
- char val[MAX_ADDR_LEN] = { };
- int ret;
-
- ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
- if (ret < 0)
- return 0;
-
- if (kstrtoull(val, 16, &addr))
- return 0;
-#endif
- return addr;
-}
-
/*
* Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
* ACPI_TABLE_GUID are found, take the former, which has more features.
}
#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static acpi_physical_address get_cmdline_acpi_rsdp(void)
+{
+ acpi_physical_address addr = 0;
+
+#ifdef CONFIG_KEXEC
+ char val[MAX_ADDR_LEN] = { };
+ int ret;
+
+ ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (kstrtoull(val, 16, &addr))
+ return 0;
+#endif
+ return addr;
+}
+
/* Compute SRAT address from RSDP. */
static unsigned long get_acpi_srat_table(void)
{
{
const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+ unsigned long needed_size;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+ /*
+ * The memory hole needed for the kernel is the larger of either
+ * the entire decompressed kernel plus relocation table, or the
+ * entire decompressed kernel plus .bss and .brk sections.
+ *
+ * On X86_64, the memory is mapped with PMD pages. Round the
+ * size up so that the full extent of PMD pages mapped is
+ * included in the check against the valid memory table
+ * entries. This ensures the full mapped area is usable RAM
+ * and doesn't include any reserved areas.
+ */
+ needed_size = max(output_len, kernel_total_size);
+#ifdef CONFIG_X86_64
+ needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
+#endif
+
/* Report initial kernel position details. */
debug_putaddr(input_data);
debug_putaddr(input_len);
debug_putaddr(output);
debug_putaddr(output_len);
debug_putaddr(kernel_total_size);
+ debug_putaddr(needed_size);
#ifdef CONFIG_X86_64
/* Report address of 32-bit trampoline */
debug_putaddr(trampoline_32bit);
#endif
- /*
- * The memory hole needed for the kernel is the larger of either
- * the entire decompressed kernel plus relocation table, or the
- * entire decompressed kernel plus .bss and .brk sections.
- */
choose_random_location((unsigned long)input_data, input_len,
(unsigned long *)&output,
- max(output_len, kernel_total_size),
+ needed_size,
&virt_addr);
/* Validate memory location choices. */
return 0;
}
+#ifdef CONFIG_RETPOLINE
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
+#endif
+
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
+#ifdef CONFIG_RETPOLINE
+ if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
+ return intel_guest_get_msrs(nr);
+ else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
+ return core_guest_get_msrs(nr);
+#endif
if (x86_pmu.guest_get_msrs)
return x86_pmu.guest_get_msrs(nr);
*nr = 0;
}
if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
- pr_info("Hyper-V: Using MSR based APIC access\n");
+ pr_info("Hyper-V: Using enlightened APIC (%s mode)",
+ x2apic_enabled() ? "x2apic" : "xapic");
+ /*
+ * With x2apic, architectural x2apic MSRs are equivalent to the
+ * respective synthetic MSRs, so there's no need to override
+ * the apic accessors. The only exception is
+ * hv_apic_eoi_write, because it benefits from lazy EOI when
+ * available, but it works for both xapic and x2apic modes.
+ */
apic_set_eoi_write(hv_apic_eoi_write);
- apic->read = hv_apic_read;
- apic->write = hv_apic_write;
- apic->icr_write = hv_apic_icr_write;
- apic->icr_read = hv_apic_icr_read;
+ if (!x2apic_enabled()) {
+ apic->read = hv_apic_read;
+ apic->write = hv_apic_write;
+ apic->icr_write = hv_apic_icr_write;
+ apic->icr_read = hv_apic_icr_read;
+ }
}
}
VCPU_REGS_R15 = __VCPU_REGS_R15,
#endif
VCPU_REGS_RIP,
- NR_VCPU_REGS
-};
+ NR_VCPU_REGS,
-enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR3,
VCPU_EXREG_RFLAGS,
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
+ /*
+ * eventsel value for general purpose counters,
+ * ctrl value for fixed counters.
+ */
+ u64 current_config;
};
struct kvm_pmu {
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
- u64 reprogram_pmi;
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+ /*
+ * The gate to release perf_events not marked in
+ * pmc_in_use only once in a vcpu time slice.
+ */
+ bool need_cleanup;
+
+ /*
+ * The total number of programmed perf_events and it helps to avoid
+ * redundant check before cleanup if guest don't use vPMU at all.
+ */
+ u8 event_count;
};
struct kvm_pmu_ops;
u64 smbase;
u64 smi_count;
bool tpr_access_reporting;
+ bool xsaves_enabled;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
- void (*decache_cr3)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+ bool (*get_enable_apicv)(struct kvm *kvm);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+ unsigned long *vcpu_bitmap);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
{
struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
- cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+ if (cmsk)
+ cpumask_clear_cpu(dead_cpu, &cmsk->mask);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
}
int hv_host_info_ecx;
int hv_host_info_edx;
+#ifdef CONFIG_PARAVIRT
+ pv_info.name = "Hyper-V";
+#endif
+
/*
* Extract the features and hints
*/
* we might write invalid pmds, when the kernel is relocated
* cleanup_highmap() fixes this up along with the mappings
* beyond _end.
+ *
+ * Only the region occupied by the kernel image has so far
+ * been checked against the table of usable memory regions
+ * provided by the firmware, so invalidate pages outside that
+ * region. A page table entry that maps to a reserved area of
+ * memory would allow processor speculation into that area,
+ * and on some hardware (particularly the UV platform) even
+ * speculative access to some reserved areas is caught as an
+ * error, causing the BIOS to halt the system.
*/
pmd = fixup_pointer(level2_kernel_pgt, physaddr);
- for (i = 0; i < PTRS_PER_PMD; i++) {
+
+ /* invalidate pages before the kernel image */
+ for (i = 0; i < pmd_index((unsigned long)_text); i++)
+ pmd[i] &= ~_PAGE_PRESENT;
+
+ /* fixup pages that are part of the kernel image */
+ for (; i <= pmd_index((unsigned long)_end); i++)
if (pmd[i] & _PAGE_PRESENT)
pmd[i] += load_delta;
- }
+
+ /* invalidate pages after the kernel image */
+ for (; i < PTRS_PER_PMD; i++)
+ pmd[i] &= ~_PAGE_PRESENT;
/*
* Fixup phys_base - remove the memory encryption mask to obtain
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/tlb.h>
+#include <asm/cpuidle_haltpoll.h>
static int kvmapf = 1;
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
return emulate_ud(ctxt);
ops->get_msr(ctxt, MSR_EFER, &efer);
- setup_syscalls_segments(ctxt, &cs, &ss);
-
if (!(efer & EFER_SCE))
return emulate_ud(ctxt);
+ setup_syscalls_segments(ctxt, &cs, &ss);
ops->get_msr(ctxt, MSR_STAR, &msr_data);
msr_data >>= 32;
cs_sel = (u16)(msr_data & 0xfffc);
if (ctxt->mode == X86EMUL_MODE_PROT64)
return X86EMUL_UNHANDLEABLE;
- setup_syscalls_segments(ctxt, &cs, &ss);
-
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
if ((msr_data & 0xfffc) == 0x0)
return emulate_gp(ctxt, 0);
+ setup_syscalls_segments(ctxt, &cs, &ss);
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
ss_sel = cs_sel + 8;
{
unsigned index;
bool mask_before, mask_after;
- int old_remote_irr, old_delivery_status;
union kvm_ioapic_redirect_entry *e;
+ unsigned long vcpu_bitmap;
+ int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
/* Preserve read-only fields */
old_remote_irr = e->fields.remote_irr;
old_delivery_status = e->fields.delivery_status;
+ old_dest_id = e->fields.dest_id;
+ old_dest_mode = e->fields.dest_mode;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index, false);
- kvm_make_scan_ioapic_request(ioapic->kvm);
+ if (e->fields.delivery_mode == APIC_DM_FIXED) {
+ struct kvm_lapic_irq irq;
+
+ irq.shorthand = 0;
+ irq.vector = e->fields.vector;
+ irq.delivery_mode = e->fields.delivery_mode << 8;
+ irq.dest_id = e->fields.dest_id;
+ irq.dest_mode = e->fields.dest_mode;
+ bitmap_zero(&vcpu_bitmap, 16);
+ kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+ &vcpu_bitmap);
+ if (old_dest_mode != e->fields.dest_mode ||
+ old_dest_id != e->fields.dest_id) {
+ /*
+ * Update vcpu_bitmap with vcpus specified in
+ * the previous request as well. This is done to
+ * keep ioapic_handled_vectors synchronized.
+ */
+ irq.dest_id = old_dest_id;
+ irq.dest_mode = old_dest_mode;
+ kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
+ &vcpu_bitmap);
+ }
+ kvm_make_scan_ioapic_request_mask(ioapic->kvm,
+ &vcpu_bitmap);
+ } else {
+ kvm_make_scan_ioapic_request(ioapic->kvm);
+ }
break;
}
}
BUILD_KVM_GPR_ACCESSORS(r15, R15)
#endif
-static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
{
- if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
+{
+ if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+ return 0;
+
+ if (!kvm_register_is_available(vcpu, reg))
kvm_x86_ops->cache_reg(vcpu, reg);
return vcpu->arch.regs[reg];
}
-static inline void kvm_register_write(struct kvm_vcpu *vcpu,
- enum kvm_reg reg,
+static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg,
unsigned long val)
{
+ if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
+ return;
+
vcpu->arch.regs[reg] = val;
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ kvm_register_mark_dirty(vcpu, reg);
}
static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
{
might_sleep(); /* on svm */
- if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail))
- kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
+ kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
return vcpu->arch.walk_mmu->pdptrs[index];
}
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
{
- if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
- kvm_x86_ops->decache_cr3(vcpu);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+ kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_CR3);
return vcpu->arch.cr3;
}
irq->level, irq->trig_mode, dest_map);
}
+static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
+ struct kvm_lapic_irq *irq, u32 min)
+{
+ int i, count = 0;
+ struct kvm_vcpu *vcpu;
+
+ if (min > map->max_apic_id)
+ return 0;
+
+ for_each_set_bit(i, ipi_bitmap,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, irq, NULL);
+ }
+ }
+
+ return count;
+}
+
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
- int i;
struct kvm_apic_map *map;
- struct kvm_vcpu *vcpu;
struct kvm_lapic_irq irq = {0};
int cluster_size = op_64_bit ? 64 : 32;
- int count = 0;
+ int count;
+
+ if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
+ return -KVM_EINVAL;
irq.vector = icr & APIC_VECTOR_MASK;
irq.delivery_mode = icr & APIC_MODE_MASK;
irq.level = (icr & APIC_INT_ASSERT) != 0;
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
- if (icr & APIC_DEST_MASK)
- return -KVM_EINVAL;
- if (icr & APIC_SHORT_MASK)
- return -KVM_EINVAL;
-
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
- if (unlikely(!map)) {
- count = -EOPNOTSUPP;
- goto out;
- }
-
- if (min > map->max_apic_id)
- goto out;
- /* Bits above cluster_size are masked in the caller. */
- for_each_set_bit(i, &ipi_bitmap_low,
- min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
- if (map->phys_map[min + i]) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
- }
+ count = -EOPNOTSUPP;
+ if (likely(map)) {
+ count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
+ min += cluster_size;
+ count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
}
- min += cluster_size;
-
- if (min > map->max_apic_id)
- goto out;
-
- for_each_set_bit(i, &ipi_bitmap_high,
- min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
- if (map->phys_map[min + i]) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
- }
- }
-
-out:
rcu_read_unlock();
return count;
}
return result;
}
+/*
+ * This routine identifies the destination vcpus mask meant to receive the
+ * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
+ * out the destination vcpus array and set the bitmap or it traverses to
+ * each available vcpu to identify the same.
+ */
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ unsigned long *vcpu_bitmap)
+{
+ struct kvm_lapic **dest_vcpu = NULL;
+ struct kvm_lapic *src = NULL;
+ struct kvm_apic_map *map;
+ struct kvm_vcpu *vcpu;
+ unsigned long bitmap;
+ int i, vcpu_idx;
+ bool ret;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
+ &bitmap);
+ if (ret) {
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dest_vcpu[i])
+ continue;
+ vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
+ __set_bit(vcpu_idx, vcpu_bitmap);
+ }
+ } else {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+ if (!kvm_apic_match_dest(vcpu, NULL,
+ irq->delivery_mode,
+ irq->dest_id,
+ irq->dest_mode))
+ continue;
+ __set_bit(i, vcpu_bitmap);
+ }
+ }
+ rcu_read_unlock();
+}
+
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
{
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
* KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
* and leave the INIT pending.
*/
- if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
+ if (kvm_vcpu_latch_init(vcpu)) {
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ unsigned long *vcpu_bitmap);
+
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
if (!skip_tlb_flush) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
- kvm_x86_ops->tlb_flush(vcpu, true);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
/*
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (!test_and_set_bit(pmc->idx,
- (unsigned long *)&pmu->reprogram_pmi)) {
+ if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (!test_and_set_bit(pmc->idx,
- (unsigned long *)&pmu->reprogram_pmi)) {
+ if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
pmc->perf_event = event;
- clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
+ pmc_to_pmu(pmc)->event_count++;
+ clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
+}
+
+static void pmc_pause_counter(struct kvm_pmc *pmc)
+{
+ u64 counter = pmc->counter;
+
+ if (!pmc->perf_event)
+ return;
+
+ /* update counter, reset event value to avoid redundant accumulation */
+ counter += perf_event_pause(pmc->perf_event, true);
+ pmc->counter = counter & pmc_bitmask(pmc);
+}
+
+static bool pmc_resume_counter(struct kvm_pmc *pmc)
+{
+ if (!pmc->perf_event)
+ return false;
+
+ /* recalibrate sample period and check if it's accepted by perf core */
+ if (perf_event_period(pmc->perf_event,
+ (-pmc->counter) & pmc_bitmask(pmc)))
+ return false;
+
+ /* reuse perf_event to serve as pmc_reprogram_counter() does*/
+ perf_event_enable(pmc->perf_event);
+
+ clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+ return true;
}
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
pmc->eventsel = eventsel;
- pmc_stop_counter(pmc);
+ pmc_pause_counter(pmc);
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
return;
if (type == PERF_TYPE_RAW)
config = eventsel & X86_RAW_EVENT_MASK;
+ if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
+ return;
+
+ pmc_release_perf_event(pmc);
+
+ pmc->current_config = eventsel;
pmc_reprogram_counter(pmc, type, config,
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
struct kvm_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
- pmc_stop_counter(pmc);
+ pmc_pause_counter(pmc);
if (!en_field || !pmc_is_enabled(pmc))
return;
return;
}
+ if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
+ return;
+
+ pmc_release_perf_event(pmc);
+
+ pmc->current_config = (u64)ctrl;
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
kvm_x86_ops->pmu_ops->find_fixed_event(idx),
!(en_field & 0x2), /* exclude user */
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- u64 bitmask;
int bit;
- bitmask = pmu->reprogram_pmi;
-
- for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+ for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
if (unlikely(!pmc || !pmc->perf_event)) {
- clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+ clear_bit(bit, pmu->reprogram_pmi);
continue;
}
reprogram_counter(pmu, bit);
}
+
+ /*
+ * Unused perf_events are only released if the corresponding MSRs
+ * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
+ * triggers KVM_REQ_PMU if cleanup is needed.
+ */
+ if (unlikely(pmu->need_cleanup))
+ kvm_pmu_cleanup(vcpu);
}
/* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
- return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
+ return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
}
bool is_vmware_backdoor_pmc(u32 pmc_idx)
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
- pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
+ pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
if (!pmc)
return 1;
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{
- return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+ return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
+ kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
+}
+
+static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr);
+
+ if (pmc)
+ __set_bit(pmc->idx, pmu->pmc_in_use);
}
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
}
memset(pmu, 0, sizeof(*pmu));
kvm_x86_ops->pmu_ops->init(vcpu);
init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
+ pmu->event_count = 0;
+ pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);
}
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
+/* Release perf_events for vPMCs that have been unused for a full time slice. */
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = NULL;
+ DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
+ int i;
+
+ pmu->need_cleanup = false;
+
+ bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
+ pmu->pmc_in_use, X86_PMC_IDX_MAX);
+
+ for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
+ pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i);
+
+ if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
+ pmc_stop_counter(pmc);
+ }
+
+ bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
+}
+
void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_pmu_reset(vcpu);
unsigned (*find_fixed_event)(int idx);
bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
- struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx,
- u64 *mask);
- int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
+ struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
+ unsigned int idx, u64 *mask);
+ struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
+ int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
return counter & pmc_bitmask(pmc);
}
-static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {
- pmc->counter = pmc_read_counter(pmc);
perf_event_release_kernel(pmc->perf_event);
pmc->perf_event = NULL;
+ pmc->current_config = 0;
+ pmc_to_pmu(pmc)->event_count--;
+ }
+}
+
+static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+{
+ if (pmc->perf_event) {
+ pmc->counter = pmc_read_counter(pmc);
+ pmc_release_perf_event(pmc);
}
}
return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc);
}
+static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu,
+ u64 data)
+{
+ return !(pmu->global_ctrl_mask & data);
+}
+
/* returns general purpose PMC with the specified MSR. Note that it can be
* used for both PERFCTRn and EVNTSELn; that is why it accepts base as a
* paramenter to tell them apart.
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx);
+int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
+void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
}
/* idx is the ECX register of RDPMC instruction */
-static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask)
+static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+ unsigned int idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *counters;
}
static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+ /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
+ return false;
+}
+
+static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- int ret = false;
+ struct kvm_pmc *pmc;
- ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) ||
- get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
+ pmc = pmc ? pmc : get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
- return ret;
+ return pmc;
}
static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->nr_arch_fixed_counters = 0;
pmu->global_status = 0;
+ bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
}
static void amd_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
+ pmu->gp_counters[i].current_config = 0;
}
}
.find_fixed_event = amd_find_fixed_event,
.pmc_is_enabled = amd_pmc_is_enabled,
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
+ .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
.msr_idx_to_pmc = amd_msr_idx_to_pmc,
- .is_valid_msr_idx = amd_is_valid_msr_idx,
+ .is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
.is_valid_msr = amd_is_valid_msr,
.get_msr = amd_pmu_get_msr,
.set_msr = amd_pmu_set_msr,
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/rwsem.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+static int sev_flush_asids(void);
+static DECLARE_RWSEM(sev_deactivate_lock);
+static DEFINE_MUTEX(sev_bitmap_lock);
static unsigned int max_sev_asid;
static unsigned int min_sev_asid;
static unsigned long *sev_asid_bitmap;
+static unsigned long *sev_reclaim_asid_bitmap;
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
struct enc_region {
/* Minimum ASID value that should be used for SEV guest */
min_sev_asid = cpuid_edx(0x8000001F);
- /* Initialize SEV ASID bitmap */
+ /* Initialize SEV ASID bitmaps */
sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_asid_bitmap)
return 1;
+ sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ if (!sev_reclaim_asid_bitmap)
+ return 1;
+
status = kmalloc(sizeof(*status), GFP_KERNEL);
if (!status)
return 1;
{
int cpu;
- if (svm_sev_enabled())
+ if (svm_sev_enabled()) {
bitmap_free(sev_asid_bitmap);
+ bitmap_free(sev_reclaim_asid_bitmap);
+
+ sev_flush_asids();
+ }
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
return 0;
}
-static void __sev_asid_free(int asid)
+static void sev_asid_free(int asid)
{
struct svm_cpu_data *sd;
int cpu, pos;
+ mutex_lock(&sev_bitmap_lock);
+
pos = asid - 1;
- clear_bit(pos, sev_asid_bitmap);
+ __set_bit(pos, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
sd->sev_vmcbs[pos] = NULL;
}
-}
-static void sev_asid_free(struct kvm *kvm)
-{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
- __sev_asid_free(sev->asid);
+ mutex_unlock(&sev_bitmap_lock);
}
static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
/* deactivate handle */
data->handle = handle;
+
+ /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
+ down_read(&sev_deactivate_lock);
sev_guest_deactivate(data, NULL);
+ up_read(&sev_deactivate_lock);
- wbinvd_on_all_cpus();
- sev_guest_df_flush(NULL);
kfree(data);
decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
mutex_unlock(&kvm->lock);
sev_unbind_asid(kvm, sev->handle);
- sev_asid_free(kvm);
+ sev_asid_free(sev->asid);
}
static void avic_vm_destroy(struct kvm *kvm)
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
break;
default:
- BUG();
+ WARN_ON_ONCE(1);
}
}
{
}
-static void svm_decache_cr3(struct kvm_vcpu *vcpu)
-{
-}
-
static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
}
return 0;
}
+#ifdef CONFIG_RETPOLINE
+ if (exit_code == SVM_EXIT_MSR)
+ return msr_interception(svm);
+ else if (exit_code == SVM_EXIT_VINTR)
+ return interrupt_window_interception(svm);
+ else if (exit_code == SVM_EXIT_INTR)
+ return intr_interception(svm);
+ else if (exit_code == SVM_EXIT_HLT)
+ return halt_interception(svm);
+ else if (exit_code == SVM_EXIT_NPF)
+ return npf_interception(svm);
+#endif
return svm_exit_handlers[exit_code](svm);
}
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (svm_nested_virtualize_tpr(vcpu) ||
- kvm_vcpu_apicv_active(vcpu))
+ if (svm_nested_virtualize_tpr(vcpu))
return;
clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
return;
}
-static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool svm_get_enable_apicv(struct kvm *kvm)
{
- return avic && irqchip_split(vcpu->kvm);
+ return avic && irqchip_split(kvm);
}
static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
clgi();
- kvm_load_guest_xcr0(vcpu);
+ kvm_load_guest_xsave_state(vcpu);
if (lapic_in_kernel(vcpu) &&
vcpu->arch.apic->lapic_timer.timer_advance_ns)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
- kvm_put_guest_xcr0(vcpu);
+ kvm_load_host_xsave_state(vcpu);
stgi();
/* Any pending NMI will happen here */
{
struct vcpu_svm *svm = to_svm(vcpu);
+ vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ boot_cpu_has(X86_FEATURE_XSAVES);
+
/* Update nrips enabled cache */
svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
static bool svm_xsaves_supported(void)
{
- return false;
+ return boot_cpu_has(X86_FEATURE_XSAVES);
}
static bool svm_umip_emulated(void)
return 0;
}
+static int sev_flush_asids(void)
+{
+ int ret, error;
+
+ /*
+ * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
+ * so it must be guarded.
+ */
+ down_write(&sev_deactivate_lock);
+
+ wbinvd_on_all_cpus();
+ ret = sev_guest_df_flush(&error);
+
+ up_write(&sev_deactivate_lock);
+
+ if (ret)
+ pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
+
+ return ret;
+}
+
+/* Must be called with the sev_bitmap_lock held */
+static bool __sev_recycle_asids(void)
+{
+ int pos;
+
+ /* Check if there are any ASIDs to reclaim before performing a flush */
+ pos = find_next_bit(sev_reclaim_asid_bitmap,
+ max_sev_asid, min_sev_asid - 1);
+ if (pos >= max_sev_asid)
+ return false;
+
+ if (sev_flush_asids())
+ return false;
+
+ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
+ max_sev_asid);
+ bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+
+ return true;
+}
+
static int sev_asid_new(void)
{
+ bool retry = true;
int pos;
+ mutex_lock(&sev_bitmap_lock);
+
/*
* SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
*/
+again:
pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
- if (pos >= max_sev_asid)
+ if (pos >= max_sev_asid) {
+ if (retry && __sev_recycle_asids()) {
+ retry = false;
+ goto again;
+ }
+ mutex_unlock(&sev_bitmap_lock);
return -EBUSY;
+ }
+
+ __set_bit(pos, sev_asid_bitmap);
+
+ mutex_unlock(&sev_bitmap_lock);
- set_bit(pos, sev_asid_bitmap);
return pos + 1;
}
return 0;
e_free:
- __sev_asid_free(asid);
+ sev_asid_free(asid);
return ret;
}
int asid = sev_get_asid(kvm);
int ret;
- wbinvd_on_all_cpus();
-
- ret = sev_guest_df_flush(error);
- if (ret)
- return ret;
-
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
if (!data)
return -ENOMEM;
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
- .decache_cr3 = svm_decache_cr3,
.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
.set_cr3 = svm_set_cr3,
#include "hyperv.h"
#include "mmu.h"
#include "nested.h"
+#include "pmu.h"
#include "trace.h"
#include "x86.h"
failed; \
})
+#define SET_MSR_OR_WARN(vcpu, idx, data) \
+({ \
+ bool failed = kvm_set_msr(vcpu, idx, data); \
+ if (failed) \
+ pr_warn_ratelimited( \
+ "%s cannot write MSR (0x%x, 0x%llx)\n", \
+ __func__, idx, data); \
+ failed; \
+})
+
/*
* Hyper-V requires all of these, so mark them as supported even though
* they are just treated the same as all-context.
return i + 1;
}
+static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
+ u32 msr_index,
+ u64 *data)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * If the L0 hypervisor stored a more accurate value for the TSC that
+ * does not include the time taken for emulation of the L2->L1
+ * VM-exit in L0, use the more accurate value.
+ */
+ if (msr_index == MSR_IA32_TSC) {
+ int index = vmx_find_msr_index(&vmx->msr_autostore.guest,
+ MSR_IA32_TSC);
+
+ if (index >= 0) {
+ u64 val = vmx->msr_autostore.guest.val[index].value;
+
+ *data = kvm_read_l1_tsc(vcpu, val);
+ return true;
+ }
+ }
+
+ if (kvm_get_msr(vcpu, msr_index, data)) {
+ pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
+ msr_index);
+ return false;
+ }
+ return true;
+}
+
+static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
+ struct vmx_msr_entry *e)
+{
+ if (kvm_vcpu_read_guest(vcpu,
+ gpa + i * sizeof(*e),
+ e, 2 * sizeof(u32))) {
+ pr_debug_ratelimited(
+ "%s cannot read MSR entry (%u, 0x%08llx)\n",
+ __func__, i, gpa + i * sizeof(*e));
+ return false;
+ }
+ if (nested_vmx_store_msr_check(vcpu, e)) {
+ pr_debug_ratelimited(
+ "%s check failed (%u, 0x%x, 0x%x)\n",
+ __func__, i, e->index, e->reserved);
+ return false;
+ }
+ return true;
+}
+
static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
{
u64 data;
if (unlikely(i >= max_msr_list_size))
return -EINVAL;
- if (kvm_vcpu_read_guest(vcpu,
- gpa + i * sizeof(e),
- &e, 2 * sizeof(u32))) {
- pr_debug_ratelimited(
- "%s cannot read MSR entry (%u, 0x%08llx)\n",
- __func__, i, gpa + i * sizeof(e));
+ if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
return -EINVAL;
- }
- if (nested_vmx_store_msr_check(vcpu, &e)) {
- pr_debug_ratelimited(
- "%s check failed (%u, 0x%x, 0x%x)\n",
- __func__, i, e.index, e.reserved);
- return -EINVAL;
- }
- if (kvm_get_msr(vcpu, e.index, &data)) {
- pr_debug_ratelimited(
- "%s cannot read MSR (%u, 0x%x)\n",
- __func__, i, e.index);
+
+ if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
return -EINVAL;
- }
+
if (kvm_vcpu_write_guest(vcpu,
gpa + i * sizeof(e) +
offsetof(struct vmx_msr_entry, value),
return 0;
}
+static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ u32 count = vmcs12->vm_exit_msr_store_count;
+ u64 gpa = vmcs12->vm_exit_msr_store_addr;
+ struct vmx_msr_entry e;
+ u32 i;
+
+ for (i = 0; i < count; i++) {
+ if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
+ return false;
+
+ if (e.index == msr_index)
+ return true;
+ }
+ return false;
+}
+
+static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
+ u32 msr_index)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
+ bool in_vmcs12_store_list;
+ int msr_autostore_index;
+ bool in_autostore_list;
+ int last;
+
+ msr_autostore_index = vmx_find_msr_index(autostore, msr_index);
+ in_autostore_list = msr_autostore_index >= 0;
+ in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
+
+ if (in_vmcs12_store_list && !in_autostore_list) {
+ if (autostore->nr == NR_LOADSTORE_MSRS) {
+ /*
+ * Emulated VMEntry does not fail here. Instead a less
+ * accurate value will be returned by
+ * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
+ * instead of reading the value from the vmcs02 VMExit
+ * MSR-store area.
+ */
+ pr_warn_ratelimited(
+ "Not enough msr entries in msr_autostore. Can't add msr %x\n",
+ msr_index);
+ return;
+ }
+ last = autostore->nr++;
+ autostore->val[last].index = msr_index;
+ } else if (!in_vmcs12_store_list && in_autostore_list) {
+ last = --autostore->nr;
+ autostore->val[msr_autostore_index] = autostore->val[last];
+ }
+}
+
static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
unsigned long invalid_mask;
kvm_mmu_new_cr3(vcpu, cr3, false);
vcpu->arch.cr3 = cr3;
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
kvm_init_mmu(vcpu, false);
* populated by L2 differently than TLB entries populated
* by L1.
*
- * If L1 uses EPT, then TLB entries are tagged with different EPTP.
+ * If L0 uses EPT, L1 and L2 run with different EPTP because
+ * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
+ * are tagged with different EPTP.
*
* If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
* with different VPID (L1 entries are tagged with vmx->vpid
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- return nested_cpu_has_ept(vmcs12) ||
+ return enable_ept ||
(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
}
* addresses are constant (for vmcs02), the counts can change based
* on L2's behavior, e.g. switching to/from long mode.
*/
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
+ vmx->nested.l1_tpr_threshold = -1;
if (exec_control & CPU_BASED_TPR_SHADOW)
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
#ifdef CONFIG_X86_64
vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
}
+ /*
+ * Make sure the msr_autostore list is up to date before we set the
+ * count in the vmcs02.
+ */
+ prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
+
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
entry_failure_code))
return -EINVAL;
+ /*
+ * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12
+ * on nested VM-Exit, which can occur without actually running L2 and
+ * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with
+ * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
+ * transition to HLT instead of running L2.
+ */
+ if (enable_ept)
+ vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
+
/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
is_pae_paging(vcpu)) {
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->guest_ia32_perf_global_ctrl))
+ return -EINVAL;
+
kvm_rsp_write(vcpu, vmcs12->guest_rsp);
kvm_rip_write(vcpu, vmcs12->guest_rip);
return 0;
CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
return -EINVAL;
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+ vmcs12->host_ia32_perf_global_ctrl)))
+ return -EINVAL;
+
#ifdef CONFIG_X86_64
ia32e = !!(vcpu->arch.efer & EFER_LMA);
#else
return -EINVAL;
}
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+ vmcs12->guest_ia32_perf_global_ctrl)))
+ return -EINVAL;
+
/*
* If the load IA32_EFER VM-entry control is 1, the following checks
* are performed on the field for the IA32_EFER MSR:
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
if (block_nested_events)
return -EBUSY;
+ clear_bit(KVM_APIC_INIT, &apic->pending_events);
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
return 0;
}
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
- vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
- vmcs12->host_ia32_perf_global_ctrl);
+ SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->host_ia32_perf_global_ctrl);
/* Set L1 segment info according to Intel SDM
27.5.2 Loading Host Segment and Descriptor-Table Registers */
nested_ept_uninit_mmu_context(vcpu);
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+ if (vmx->nested.l1_tpr_threshold != -1)
+ vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
return 0;
}
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx;
+
+ if (!nested_vmx_allowed(vcpu))
+ return;
+
+ vmx = to_vmx(vcpu);
+ if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+ vmx->nested.msrs.entry_ctls_high |=
+ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ vmx->nested.msrs.exit_ctls_high |=
+ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ } else {
+ vmx->nested.msrs.entry_ctls_high &=
+ ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ vmx->nested.msrs.exit_ctls_high &=
+ ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ }
+}
+
static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
{
gva_t gva;
return ret;
}
-void nested_vmx_vcpu_setup(void)
+void nested_vmx_set_vmcs_shadowing_bitmap(void)
{
if (enable_shadow_vmcs) {
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
init_vmcs_shadow_fields();
}
- exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear,
- exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
- exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld,
- exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst,
- exit_handlers[EXIT_REASON_VMREAD] = handle_vmread,
- exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume,
- exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite,
- exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff,
- exit_handlers[EXIT_REASON_VMON] = handle_vmon,
- exit_handlers[EXIT_REASON_INVEPT] = handle_invept,
- exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid,
- exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc,
+ exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
+ exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
+ exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
+ exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
+ exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
+ exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
+ exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
+ exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
+ exit_handlers[EXIT_REASON_VMON] = handle_vmon;
+ exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
+ exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
+ exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
kvm_x86_ops->check_nested_events = vmx_check_nested_events;
kvm_x86_ops->get_nested_state = vmx_get_nested_state;
kvm_x86_ops->set_nested_state = vmx_set_nested_state;
- kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages,
+ kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages;
kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs;
kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version;
bool apicv);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
-void nested_vmx_vcpu_setup(void);
+void nested_vmx_set_vmcs_shadowing_bitmap(void);
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
return ((val & fixed1) | fixed0) == val;
}
-static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
return fixed_bits_valid(val, fixed0, fixed1);
}
-static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
return fixed_bits_valid(val, fixed0, fixed1);
}
-static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
#include "x86.h"
#include "cpuid.h"
#include "lapic.h"
+#include "nested.h"
#include "pmu.h"
static struct kvm_event_hw_type_mapping intel_arch_events[] = {
if (old_ctrl == new_ctrl)
continue;
+ __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
reprogram_fixed_counter(pmc, new_ctrl, i);
}
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
+static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
(fixed && idx >= pmu->nr_arch_fixed_counters);
}
-static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
- unsigned idx, u64 *mask)
+static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+ unsigned int idx, u64 *mask)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
return ret;
}
+static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+
+ pmc = get_fixed_pmc(pmu, msr);
+ pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
+ pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
+
+ return pmc;
+}
+
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
case MSR_CORE_PERF_GLOBAL_CTRL:
if (pmu->global_ctrl == data)
return 0;
- if (!(data & pmu->global_ctrl_mask)) {
+ if (kvm_valid_perf_global_ctrl(pmu, data)) {
global_ctrl_changed(pmu, data);
return 0;
}
(boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
(entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
+
+ bitmap_set(pmu->all_valid_pmc_idx,
+ 0, pmu->nr_arch_gp_counters);
+ bitmap_set(pmu->all_valid_pmc_idx,
+ INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
+
+ nested_vmx_pmu_entry_exit_ctls_update(vcpu);
}
static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
+ pmu->gp_counters[i].current_config = 0;
}
for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
pmu->fixed_counters[i].vcpu = vcpu;
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+ pmu->fixed_counters[i].current_config = 0;
}
}
.find_fixed_event = intel_find_fixed_event,
.pmc_is_enabled = intel_pmc_is_enabled,
.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
+ .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
.msr_idx_to_pmc = intel_msr_idx_to_pmc,
- .is_valid_msr_idx = intel_is_valid_msr_idx,
+ .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
.is_valid_msr = intel_is_valid_msr,
.get_msr = intel_pmu_get_msr,
.set_msr = intel_pmu_set_msr,
static bool __read_mostly nested = 1;
module_param(nested, bool, S_IRUGO);
-static u64 __read_mostly host_xss;
-
bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
bool ret;
u32 mask = 1 << (seg * SEG_FIELD_NR + field);
- if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
- vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
+ if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
+ kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
vmx->segment_cache.bitmask = 0;
}
ret = vmx->segment_cache.bitmask & mask;
vm_exit_controls_clearbit(vmx, exit);
}
-static int find_msr(struct vmx_msrs *m, unsigned int msr)
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
{
unsigned int i;
}
break;
}
- i = find_msr(&m->guest, msr);
+ i = vmx_find_msr_index(&m->guest, msr);
if (i < 0)
goto skip_guest;
--m->guest.nr;
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
skip_guest:
- i = find_msr(&m->host, msr);
+ i = vmx_find_msr_index(&m->host, msr);
if (i < 0)
return;
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
- i = find_msr(&m->guest, msr);
+ i = vmx_find_msr_index(&m->guest, msr);
if (!entry_only)
- j = find_msr(&m->host, msr);
+ j = vmx_find_msr_index(&m->host, msr);
- if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
- (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) {
+ if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
+ (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) {
printk_once(KERN_WARNING "Not enough msr switch entries. "
"Can't add msr %x\n", msr);
return;
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long rflags, save_rflags;
- if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
- __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
+ kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
rflags = vmcs_readl(GUEST_RFLAGS);
- if (to_vmx(vcpu)->rmode.vm86_active) {
+ if (vmx->rmode.vm86_active) {
rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
- save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+ save_rflags = vmx->rmode.save_rflags;
rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
}
- to_vmx(vcpu)->rflags = rflags;
+ vmx->rflags = rflags;
}
- return to_vmx(vcpu)->rflags;
+ return vmx->rflags;
}
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
- unsigned long old_rflags = vmx_get_rflags(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long old_rflags;
- __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
- to_vmx(vcpu)->rflags = rflags;
- if (to_vmx(vcpu)->rmode.vm86_active) {
- to_vmx(vcpu)->rmode.save_rflags = rflags;
+ if (enable_unrestricted_guest) {
+ kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
+ vmx->rflags = rflags;
+ vmcs_writel(GUEST_RFLAGS, rflags);
+ return;
+ }
+
+ old_rflags = vmx_get_rflags(vcpu);
+ vmx->rflags = rflags;
+ if (vmx->rmode.vm86_active) {
+ vmx->rmode.save_rflags = rflags;
rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
}
vmcs_writel(GUEST_RFLAGS, rflags);
- if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
- to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
+ if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
+ vmx->emulation_required = emulation_required(vcpu);
}
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
return 1;
return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data);
- case MSR_IA32_XSS:
- if (!vmx_xsaves_supported() ||
- (!msr_info->host_initiated &&
- !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
- return 1;
- msr_info->data = vcpu->arch.ia32_xss;
- break;
case MSR_IA32_RTIT_CTL:
if (pt_mode != PT_MODE_HOST_GUEST)
return 1;
if (!nested_vmx_allowed(vcpu))
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
- case MSR_IA32_XSS:
- if (!vmx_xsaves_supported() ||
- (!msr_info->host_initiated &&
- !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
- return 1;
- /*
- * The only supported bit as of Skylake is bit 8, but
- * it is not supported on KVM.
- */
- if (data != 0)
- return 1;
- vcpu->arch.ia32_xss = data;
- if (vcpu->arch.ia32_xss != host_xss)
- add_atomic_switch_msr(vmx, MSR_IA32_XSS,
- vcpu->arch.ia32_xss, host_xss, false);
- else
- clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
- break;
case MSR_IA32_RTIT_CTL:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
vmx_rtit_ctl_check(vcpu, data) ||
static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, reg);
+
switch (reg) {
case VCPU_REGS_RSP:
vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
if (enable_ept)
ept_save_pdptrs(vcpu);
break;
+ case VCPU_EXREG_CR3:
+ if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ break;
default:
+ WARN_ON_ONCE(1);
break;
}
}
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
}
-static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
-{
- if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-}
-
static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
{
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
- if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty))
+ if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
return;
if (is_pae_paging(vcpu)) {
mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
}
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail);
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty);
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
}
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
- vmx_decache_cr3(vcpu);
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+ vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
struct kvm *kvm = vcpu->kvm;
+ bool update_guest_cr3 = true;
unsigned long guest_cr3;
u64 eptp;
spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
}
- if (enable_unrestricted_guest || is_paging(vcpu) ||
- is_guest_mode(vcpu))
- guest_cr3 = kvm_read_cr3(vcpu);
- else
+ /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
+ if (is_guest_mode(vcpu))
+ update_guest_cr3 = false;
+ else if (!enable_unrestricted_guest && !is_paging(vcpu))
guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
+ else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+ guest_cr3 = vcpu->arch.cr3;
+ else /* vmcs01.GUEST_CR3 is already up-to-date. */
+ update_guest_cr3 = false;
ept_load_pdptrs(vcpu);
}
- vmcs_writel(GUEST_CR3, guest_cr3);
+ if (update_guest_cr3)
+ vmcs_writel(GUEST_CR3, guest_cr3);
}
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
}
-static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
+static bool vmx_get_enable_apicv(struct kvm *kvm)
{
return enable_apicv;
}
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
+ vcpu->arch.xsaves_enabled = xsaves_enabled;
+
if (!xsaves_enabled)
exec_control &= ~SECONDARY_EXEC_XSAVES;
#define VMX_XSS_EXIT_BITMAP 0
/*
- * Sets up the vmcs for emulated real mode.
+ * Noting that the initialization of Guest-state Area of VMCS is in
+ * vmx_vcpu_reset().
*/
-static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
+static void init_vmcs(struct vcpu_vmx *vmx)
{
- int i;
-
if (nested)
- nested_vmx_vcpu_setup();
+ nested_vmx_set_vmcs_shadowing_bitmap();
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
/* Control */
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
- vmx->hv_deadline_tsc = -1;
exec_controls_set(vmx, vmx_exec_control(vmx));
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
- u32 index = vmx_msr_index[i];
- u32 data_low, data_high;
- int j = vmx->nmsrs;
-
- if (rdmsr_safe(index, &data_low, &data_high) < 0)
- continue;
- if (wrmsr_safe(index, data_low, data_high) < 0)
- continue;
- vmx->guest_msrs[j].index = i;
- vmx->guest_msrs[j].data = 0;
- vmx->guest_msrs[j].mask = -1ull;
- ++vmx->nmsrs;
- }
-
vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
set_cr4_guest_host_mask(vmx);
+ if (vmx->vpid != 0)
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+
if (vmx_xsaves_supported())
vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
- if (vmx->vpid != 0)
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
-
cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
vmx->vcpu.arch.cr0 = cr0;
vmx_set_cr0(vcpu, cr0); /* enter rmode */
return 0;
}
-static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_exits;
return 1;
vmcs_writel(GUEST_DR7, val);
}
-static int handle_cpuid(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_cpuid(vcpu);
-}
-
-static int handle_rdmsr(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_rdmsr(vcpu);
-}
-
-static int handle_wrmsr(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_wrmsr(vcpu);
-}
-
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{
kvm_apic_update_ppr(vcpu);
return 1;
}
-static int handle_halt(struct kvm_vcpu *vcpu)
-{
- return kvm_emulate_halt(vcpu);
-}
-
static int handle_vmcall(struct kvm_vcpu *vcpu)
{
return kvm_emulate_hypercall(vcpu);
return 1;
}
-static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu)
-{
- kvm_skip_emulated_instruction(vcpu);
- WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x",
- vmcs_read32(VM_EXIT_REASON));
- return 1;
-}
-
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
[EXIT_REASON_IO_INSTRUCTION] = handle_io,
[EXIT_REASON_CR_ACCESS] = handle_cr,
[EXIT_REASON_DR_ACCESS] = handle_dr,
- [EXIT_REASON_CPUID] = handle_cpuid,
- [EXIT_REASON_MSR_READ] = handle_rdmsr,
- [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
+ [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
+ [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
+ [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
- [EXIT_REASON_HLT] = handle_halt,
+ [EXIT_REASON_HLT] = kvm_emulate_halt,
[EXIT_REASON_INVD] = handle_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_RDPMC] = handle_rdpmc,
[EXIT_REASON_INVVPID] = handle_vmx_instruction,
[EXIT_REASON_RDRAND] = handle_invalid_op,
[EXIT_REASON_RDSEED] = handle_invalid_op,
- [EXIT_REASON_XSAVES] = handle_unexpected_vmexit,
- [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit,
[EXIT_REASON_PML_FULL] = handle_pml_full,
[EXIT_REASON_INVPCID] = handle_invpcid,
[EXIT_REASON_VMFUNC] = handle_vmx_instruction,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
[EXIT_REASON_ENCLS] = handle_encls,
- [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit,
- [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit,
};
static const int kvm_vmx_max_exit_handlers =
}
if (exit_reason < kvm_vmx_max_exit_handlers
- && kvm_vmx_exit_handlers[exit_reason])
+ && kvm_vmx_exit_handlers[exit_reason]) {
+#ifdef CONFIG_RETPOLINE
+ if (exit_reason == EXIT_REASON_MSR_WRITE)
+ return kvm_emulate_wrmsr(vcpu);
+ else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
+ return handle_preemption_timer(vcpu);
+ else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT)
+ return handle_interrupt_window(vcpu);
+ else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+ return handle_external_interrupt(vcpu);
+ else if (exit_reason == EXIT_REASON_HLT)
+ return kvm_emulate_halt(vcpu);
+ else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
+ return handle_ept_misconfig(vcpu);
+#endif
return kvm_vmx_exit_handlers[exit_reason](vcpu);
- else {
+ } else {
vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
exit_reason);
dump_vmcs();
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ int tpr_threshold;
if (is_guest_mode(vcpu) &&
nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
return;
- if (irr == -1 || tpr < irr) {
- vmcs_write32(TPR_THRESHOLD, 0);
- return;
- }
-
- vmcs_write32(TPR_THRESHOLD, irr);
+ tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
+ if (is_guest_mode(vcpu))
+ to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
+ else
+ vmcs_write32(TPR_THRESHOLD, tpr_threshold);
}
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
if (vmx->nested.need_vmcs12_to_shadow_sync)
nested_sync_vmcs12_to_shadow(vcpu);
- if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
- if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+ if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr3 = __get_current_cr3_fast();
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
- kvm_load_guest_xcr0(vcpu);
+ kvm_load_guest_xsave_state(vcpu);
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
__write_pkru(vmx->host_pkru);
}
- kvm_put_guest_xcr0(vcpu);
+ kvm_load_host_xsave_state(vcpu);
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
int err;
struct vcpu_vmx *vmx;
unsigned long *msr_bitmap;
- int cpu;
+ int i, cpu;
BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
"struct kvm_vcpu must be at offset 0 for arch usercopy region");
if (!vmx->guest_msrs)
goto free_pml;
+ for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
+ u32 index = vmx_msr_index[i];
+ u32 data_low, data_high;
+ int j = vmx->nmsrs;
+
+ if (rdmsr_safe(index, &data_low, &data_high) < 0)
+ continue;
+ if (wrmsr_safe(index, data_low, data_high) < 0)
+ continue;
+ vmx->guest_msrs[j].index = i;
+ vmx->guest_msrs[j].data = 0;
+ vmx->guest_msrs[j].mask = -1ull;
+ ++vmx->nmsrs;
+ }
+
err = alloc_loaded_vmcs(&vmx->vmcs01);
if (err < 0)
goto free_msrs;
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
vmx->vcpu.cpu = cpu;
- vmx_vcpu_setup(vmx);
+ init_vmcs(vmx);
vmx_vcpu_put(&vmx->vcpu);
put_cpu();
if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
+ cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57));
#undef cr4_fixed1_update
}
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
+ vcpu->arch.xsaves_enabled = false;
+
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
vmcs_set_secondary_exec_control(vmx);
WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
}
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrl(MSR_IA32_XSS, host_xss);
-
if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
- .decache_cr3 = vmx_decache_cr3,
.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
.set_cr0 = vmx_set_cr0,
.set_cr3 = vmx_set_cr3,
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
-#define NR_AUTOLOAD_MSRS 8
+#define NR_LOADSTORE_MSRS 8
struct vmx_msrs {
unsigned int nr;
- struct vmx_msr_entry val[NR_AUTOLOAD_MSRS];
+ struct vmx_msr_entry val[NR_LOADSTORE_MSRS];
};
struct shared_msr_entry {
u64 vmcs01_debugctl;
u64 vmcs01_guest_bndcfgs;
+ /* to migrate it to L1 if L2 writes to L1's CR8 directly */
+ int l1_tpr_threshold;
+
u16 vpid02;
u16 last_vpid;
struct vmx_msrs host;
} msr_autoload;
+ struct msr_autostore {
+ struct vmx_msrs guest;
+ } msr_autostore;
+
struct {
int vm86_active;
ulong save_rflags;
struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
#define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1
static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
static struct kvm_shared_msrs __percpu *shared_msrs;
+static u64 __read_mostly host_xss;
+
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
{ "pf_guest", VCPU_STAT(pf_guest) },
asmlinkage __visible void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
- if (!kvm_rebooting)
- BUG();
+ BUG_ON(!kvm_rebooting);
}
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
ret = 1;
memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail);
- __set_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_dirty);
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+
out:
return ret;
bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
- bool changed = true;
int offset;
gfn_t gfn;
int r;
if (!is_pae_paging(vcpu))
return false;
- if (!test_bit(VCPU_EXREG_PDPTR,
- (unsigned long *)&vcpu->arch.regs_avail))
+ if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
return true;
gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
PFERR_USER_MASK | PFERR_WRITE_MASK);
if (r < 0)
- goto out;
- changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
-out:
+ return true;
- return changed;
+ return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
}
EXPORT_SYMBOL_GPL(pdptrs_changed);
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
{
- if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
- !vcpu->guest_xcr0_loaded) {
- /* kvm_set_xcr() also depends on this */
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
- vcpu->guest_xcr0_loaded = 1;
+
+ if (vcpu->arch.xsaves_enabled &&
+ vcpu->arch.ia32_xss != host_xss)
+ wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
}
-EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_xcr0_loaded) {
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
- vcpu->guest_xcr0_loaded = 0;
+
+ if (vcpu->arch.xsaves_enabled &&
+ vcpu->arch.ia32_xss != host_xss)
+ wrmsrl(MSR_IA32_XSS, host_xss);
}
+
}
-EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
+EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
vcpu->arch.cr3 = cr3;
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
return 0;
}
}
#ifdef CONFIG_X86_64
+struct pvclock_clock {
+ int vclock_mode;
+ u64 cycle_last;
+ u64 mask;
+ u32 mult;
+ u32 shift;
+};
+
struct pvclock_gtod_data {
seqcount_t seq;
- struct { /* extract of a clocksource struct */
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
- } clock;
+ struct pvclock_clock clock; /* extract of a clocksource struct */
+ struct pvclock_clock raw_clock; /* extract of a clocksource struct */
+ u64 boot_ns_raw;
u64 boot_ns;
u64 nsec_base;
u64 wall_time_sec;
+ u64 monotonic_raw_nsec;
};
static struct pvclock_gtod_data pvclock_gtod_data;
static void update_pvclock_gtod(struct timekeeper *tk)
{
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
- u64 boot_ns;
+ u64 boot_ns, boot_ns_raw;
boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
+ boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot));
write_seqcount_begin(&vdata->seq);
vdata->clock.mult = tk->tkr_mono.mult;
vdata->clock.shift = tk->tkr_mono.shift;
+ vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode;
+ vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
+ vdata->raw_clock.mask = tk->tkr_raw.mask;
+ vdata->raw_clock.mult = tk->tkr_raw.mult;
+ vdata->raw_clock.shift = tk->tkr_raw.shift;
+
vdata->boot_ns = boot_ns;
vdata->nsec_base = tk->tkr_mono.xtime_nsec;
vdata->wall_time_sec = tk->xtime_sec;
+ vdata->boot_ns_raw = boot_ns_raw;
+ vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec;
+
write_seqcount_end(&vdata->seq);
}
#endif
return last;
}
-static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
+static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
+ int *mode)
{
long v;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
u64 tsc_pg_val;
- switch (gtod->clock.vclock_mode) {
+ switch (clock->vclock_mode) {
case VCLOCK_HVCLOCK:
tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
tsc_timestamp);
if (tsc_pg_val != U64_MAX) {
/* TSC page valid */
*mode = VCLOCK_HVCLOCK;
- v = (tsc_pg_val - gtod->clock.cycle_last) &
- gtod->clock.mask;
+ v = (tsc_pg_val - clock->cycle_last) &
+ clock->mask;
} else {
/* TSC page invalid */
*mode = VCLOCK_NONE;
case VCLOCK_TSC:
*mode = VCLOCK_TSC;
*tsc_timestamp = read_tsc();
- v = (*tsc_timestamp - gtod->clock.cycle_last) &
- gtod->clock.mask;
+ v = (*tsc_timestamp - clock->cycle_last) &
+ clock->mask;
break;
default:
*mode = VCLOCK_NONE;
if (*mode == VCLOCK_NONE)
*tsc_timestamp = v = 0;
- return v * gtod->clock.mult;
+ return v * clock->mult;
}
-static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
+static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
do {
seq = read_seqcount_begin(>od->seq);
- ns = gtod->nsec_base;
- ns += vgettsc(tsc_timestamp, &mode);
+ ns = gtod->monotonic_raw_nsec;
+ ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
- ns += gtod->boot_ns;
+ ns += gtod->boot_ns_raw;
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
*t = ns;
seq = read_seqcount_begin(>od->seq);
ts->tv_sec = gtod->wall_time_sec;
ns = gtod->nsec_base;
- ns += vgettsc(tsc_timestamp, &mode);
+ ns += vgettsc(>od->clock, tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+ return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
tsc_timestamp));
}
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
vcpu->arch.pv_time_enabled = false;
+ vcpu->arch.time = 0;
}
static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
+ case MSR_IA32_XSS:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ return 1;
+ /*
+ * We do support PT if kvm_x86_ops->pt_supported(), but we do
+ * not support IA32_XSS[bit 8]. Guests will have to use
+ * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
+ * MSRs.
+ */
+ if (data != 0)
+ return 1;
+ vcpu->arch.ia32_xss = data;
+ break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
return 1;
case MSR_KVM_SYSTEM_TIME: {
struct kvm_arch *ka = &vcpu->kvm->arch;
- kvmclock_reset(vcpu);
-
if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
/* we verify if the enable bit is set... */
+ vcpu->arch.pv_time_enabled = false;
if (!(data & 1))
break;
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))
- vcpu->arch.pv_time_enabled = false;
- else
vcpu->arch.pv_time_enabled = true;
break;
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
msr_info->host_initiated);
+ case MSR_IA32_XSS:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ return 1;
+ msr_info->data = vcpu->arch.ia32_xss;
+ break;
case MSR_K7_CLK_CTL:
/*
* Provide expected ramp-up count for K7. All other
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
else
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
- if (lapic_in_kernel(vcpu)) {
- if (events->smi.latched_init)
- set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
- else
- clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
- }
+ }
+
+ if (lapic_in_kernel(vcpu)) {
+ if (events->smi.latched_init)
+ set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ else
+ clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
}
if (!irqchip_kernel(kvm))
goto set_irqchip_out;
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
- if (r)
- goto set_irqchip_out;
- r = 0;
set_irqchip_out:
kfree(chip);
break;
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
- return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
+ return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
}
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+ unsigned long *vcpu_bitmap)
+{
+ cpumask_var_t cpus;
+ bool called;
+
+ zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+
+ called = kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
+ vcpu_bitmap, cpus);
+
+ free_cpumask_var(cpus);
+}
+
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
*/
put_page(page);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
{
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
goto out;
- /* INITs are latched while in SMM */
- if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+ /*
+ * KVM_MP_STATE_INIT_RECEIVED means the processor is in
+ * INIT state; latched init should be reported using
+ * KVM_SET_VCPU_EVENTS, so reject it here.
+ */
+ if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
(mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
goto out;
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
kvm_set_cr8(vcpu, sregs->cr8);
kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
}
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ rdmsrl(MSR_IA32_XSS, host_xss);
+
kvm_init_msr_list();
return 0;
}
goto fail_free_pio_data;
if (irqchip_in_kernel(vcpu->kvm)) {
- vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
+ vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
if (r < 0)
goto fail_mmu_destroy;
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
vcpu->arch.l1tf_flush_l1d = true;
+ if (pmu->version && unlikely(pmu->event_count)) {
+ pmu->need_cleanup = true;
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+ }
kvm_x86_ops->sched_in(vcpu, cpu);
}
return false;
}
-static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
+static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, int reg)
{
unsigned long val = kvm_register_read(vcpu, reg);
}
static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
- enum kvm_reg reg,
- unsigned long val)
+ int reg, unsigned long val)
{
if (!is_64_bit_mode(vcpu))
val = (u32)val;
return !(kvm->arch.disabled_quirks & quirk);
}
+static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
+{
+ return is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu);
+}
+
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
return (data | ((data & 0x0202020202020202ull) << 1)) == data;
}
-void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
-void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
#endif
reg = <0xf0100000 0x03f00000>;
// BUS_ADDRESS(3) CPU_PHYSICAL(1) SIZE(2)
- ranges = <0x01000000 0x0 0xf0000000 0xf0000000 0x0 0x00010000>,
+ ranges = <0x01000000 0x0 0x00000000 0xf0000000 0x0 0x00010000>,
<0x02000000 0x0 0xf4000000 0xf4000000 0x0 0x08000000>;
// PCI_DEVICE(3) INT#(1) CONTROLLER(PHANDLE) CONTROLLER_DATA(2)
" getex %0\n"
" beqz %0, 1b\n"
: "=&a" (tmp)
- : "a" (~mask), "a" (p)
+ : "a" (mask), "a" (p)
: "memory");
}
case 4: __put_user_asm(x, ptr, retval, 4, "s32i", __cb); break; \
case 8: { \
__typeof__(*ptr) __v64 = x; \
- retval = __copy_to_user(ptr, &__v64, 8); \
+ retval = __copy_to_user(ptr, &__v64, 8) ? -EFAULT : 0; \
break; \
} \
default: __put_user_bad(); \
#define __check_align_1 ""
#define __check_align_2 \
- " _bbci.l %3, 0, 1f \n" \
- " movi %0, %4 \n" \
+ " _bbci.l %[addr], 0, 1f \n" \
+ " movi %[err], %[efault] \n" \
" _j 2f \n"
#define __check_align_4 \
- " _bbsi.l %3, 0, 0f \n" \
- " _bbci.l %3, 1, 1f \n" \
- "0: movi %0, %4 \n" \
+ " _bbsi.l %[addr], 0, 0f \n" \
+ " _bbci.l %[addr], 1, 1f \n" \
+ "0: movi %[err], %[efault] \n" \
" _j 2f \n"
* WARNING: If you modify this macro at all, verify that the
* __check_align_* macros still work.
*/
-#define __put_user_asm(x, addr, err, align, insn, cb) \
+#define __put_user_asm(x_, addr_, err_, align, insn, cb)\
__asm__ __volatile__( \
__check_align_##align \
- "1: "insn" %2, %3, 0 \n" \
+ "1: "insn" %[x], %[addr], 0 \n" \
"2: \n" \
" .section .fixup,\"ax\" \n" \
" .align 4 \n" \
" .literal_position \n" \
"5: \n" \
- " movi %1, 2b \n" \
- " movi %0, %4 \n" \
- " jx %1 \n" \
+ " movi %[tmp], 2b \n" \
+ " movi %[err], %[efault] \n" \
+ " jx %[tmp] \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" .long 1b, 5b \n" \
" .previous" \
- :"=r" (err), "=r" (cb) \
- :"r" ((int)(x)), "r" (addr), "i" (-EFAULT), "0" (err))
+ :[err] "+r"(err_), [tmp] "=r"(cb) \
+ :[x] "r"(x_), [addr] "r"(addr_), [efault] "i"(-EFAULT))
#define __get_user_nocheck(x, ptr, size) \
({ \
- long __gu_err, __gu_val; \
- __get_user_size(__gu_val, (ptr), (size), __gu_err); \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
+ long __gu_err; \
+ __get_user_size((x), (ptr), (size), __gu_err); \
__gu_err; \
})
#define __get_user_check(x, ptr, size) \
({ \
- long __gu_err = -EFAULT, __gu_val = 0; \
+ long __gu_err = -EFAULT; \
const __typeof__(*(ptr)) *__gu_addr = (ptr); \
- if (access_ok(__gu_addr, size)) \
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
+ if (access_ok(__gu_addr, size)) \
+ __get_user_size((x), __gu_addr, (size), __gu_err); \
+ else \
+ (x) = 0; \
__gu_err; \
})
case 1: __get_user_asm(x, ptr, retval, 1, "l8ui", __cb); break;\
case 2: __get_user_asm(x, ptr, retval, 2, "l16ui", __cb); break;\
case 4: __get_user_asm(x, ptr, retval, 4, "l32i", __cb); break;\
- case 8: retval = __copy_from_user(&x, ptr, 8); break; \
- default: (x) = __get_user_bad(); \
+ case 8: { \
+ u64 __x; \
+ if (unlikely(__copy_from_user(&__x, ptr, 8))) { \
+ retval = -EFAULT; \
+ (x) = 0; \
+ } else { \
+ (x) = *(__force __typeof__((ptr)))&__x; \
+ } \
+ break; \
+ } \
+ default: (x) = 0; __get_user_bad(); \
} \
} while (0)
* WARNING: If you modify this macro at all, verify that the
* __check_align_* macros still work.
*/
-#define __get_user_asm(x, addr, err, align, insn, cb) \
-__asm__ __volatile__( \
- __check_align_##align \
- "1: "insn" %2, %3, 0 \n" \
- "2: \n" \
- " .section .fixup,\"ax\" \n" \
- " .align 4 \n" \
- " .literal_position \n" \
- "5: \n" \
- " movi %1, 2b \n" \
- " movi %2, 0 \n" \
- " movi %0, %4 \n" \
- " jx %1 \n" \
- " .previous \n" \
- " .section __ex_table,\"a\" \n" \
- " .long 1b, 5b \n" \
- " .previous" \
- :"=r" (err), "=r" (cb), "=r" (x) \
- :"r" (addr), "i" (-EFAULT), "0" (err))
+#define __get_user_asm(x_, addr_, err_, align, insn, cb) \
+do { \
+ u32 __x = 0; \
+ __asm__ __volatile__( \
+ __check_align_##align \
+ "1: "insn" %[x], %[addr], 0 \n" \
+ "2: \n" \
+ " .section .fixup,\"ax\" \n" \
+ " .align 4 \n" \
+ " .literal_position \n" \
+ "5: \n" \
+ " movi %[tmp], 2b \n" \
+ " movi %[err], %[efault] \n" \
+ " jx %[tmp] \n" \
+ " .previous \n" \
+ " .section __ex_table,\"a\" \n" \
+ " .long 1b, 5b \n" \
+ " .previous" \
+ :[err] "+r"(err_), [tmp] "=r"(cb), [x] "+r"(__x) \
+ :[addr] "r"(addr_), [efault] "i"(-EFAULT)); \
+ (x_) = (__force __typeof__(*(addr_)))__x; \
+} while (0)
/*
// FIXME EXPORT_SYMBOL(screen_info);
#endif
-EXPORT_SYMBOL(outsb);
-EXPORT_SYMBOL(outsw);
-EXPORT_SYMBOL(outsl);
-EXPORT_SYMBOL(insb);
-EXPORT_SYMBOL(insw);
-EXPORT_SYMBOL(insl);
-
extern long common_exception_return;
EXPORT_SYMBOL(common_exception_return);
const struct blkcg_policy *pol)
{
struct blkg_policy_data *pd_prealloc = NULL;
- struct blkcg_gq *blkg;
+ struct blkcg_gq *blkg, *pinned_blkg = NULL;
int ret;
if (blkcg_policy_enabled(q, pol))
if (queue_is_mq(q))
blk_mq_freeze_queue(q);
-pd_prealloc:
- if (!pd_prealloc) {
- pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
- if (!pd_prealloc) {
- ret = -ENOMEM;
- goto out_bypass_end;
- }
- }
-
+retry:
spin_lock_irq(&q->queue_lock);
- /* blkg_list is pushed at the head, reverse walk to init parents first */
+ /* blkg_list is pushed at the head, reverse walk to allocate parents first */
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;
if (blkg->pd[pol->plid])
continue;
- pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
- if (!pd)
- swap(pd, pd_prealloc);
+ /* If prealloc matches, use it; otherwise try GFP_NOWAIT */
+ if (blkg == pinned_blkg) {
+ pd = pd_prealloc;
+ pd_prealloc = NULL;
+ } else {
+ pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
+ blkg->blkcg);
+ }
+
if (!pd) {
+ /*
+ * GFP_NOWAIT failed. Free the existing one and
+ * prealloc for @blkg w/ GFP_KERNEL.
+ */
+ if (pinned_blkg)
+ blkg_put(pinned_blkg);
+ blkg_get(blkg);
+ pinned_blkg = blkg;
+
spin_unlock_irq(&q->queue_lock);
- goto pd_prealloc;
+
+ if (pd_prealloc)
+ pol->pd_free_fn(pd_prealloc);
+ pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
+ blkg->blkcg);
+ if (pd_prealloc)
+ goto retry;
+ else
+ goto enomem;
}
blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
- if (pol->pd_init_fn)
- pol->pd_init_fn(pd);
}
+ /* all allocated, init in the same order */
+ if (pol->pd_init_fn)
+ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+ pol->pd_init_fn(blkg->pd[pol->plid]);
+
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
spin_unlock_irq(&q->queue_lock);
-out_bypass_end:
+out:
if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
+ if (pinned_blkg)
+ blkg_put(pinned_blkg);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;
+
+enomem:
+ /* alloc failed, nothing's initialized yet, free everything */
+ spin_lock_irq(&q->queue_lock);
+ list_for_each_entry(blkg, &q->blkg_list, q_node) {
+ if (blkg->pd[pol->plid]) {
+ pol->pd_free_fn(blkg->pd[pol->plid]);
+ blkg->pd[pol->plid] = NULL;
+ }
+ }
+ spin_unlock_irq(&q->queue_lock);
+ ret = -ENOMEM;
+ goto out;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
{
- struct rq_qos *cur, *prev = NULL;
- for (cur = q->rq_qos; cur; cur = cur->next) {
- if (cur == rqos) {
- if (prev)
- prev->next = rqos->next;
- else
- q->rq_qos = cur;
+ struct rq_qos **cur;
+
+ for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+ if (*cur == rqos) {
+ *cur = rqos->next;
break;
}
- prev = cur;
}
blk_mq_debugfs_unregister_rqos(rqos);
static inline bool elv_support_iosched(struct request_queue *q)
{
- if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
+ if (!q->mq_ops ||
+ (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
return false;
return true;
}
pcc_data[pcc_ss_id]->refcount--;
if (!pcc_data[pcc_ss_id]->refcount) {
pcc_mbox_free_channel(pcc_data[pcc_ss_id]->pcc_channel);
- pcc_data[pcc_ss_id]->pcc_channel_acquired = 0;
kfree(pcc_data[pcc_ss_id]);
+ pcc_data[pcc_ss_id] = NULL;
}
}
}
pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n",
p->flags, p->processor_PD, p->memory_PD);
- if (p->flags & ACPI_HMAT_MEMORY_PD_VALID) {
+ if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
target = find_mem_target(p->memory_PD);
if (!target) {
pr_debug("HMAT: Memory Domain missing from SRAT\n");
struct acpi_processor *pr = per_cpu(processors, cpu);
int ret;
+ if (!pr)
+ return;
+
ret = dev_pm_qos_add_request(get_cpu_device(cpu),
&pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY,
INT_MAX);
- if (ret < 0) {
+ if (ret < 0)
pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
ret);
- return;
- }
}
void acpi_processor_ppc_exit(int cpu)
{
struct acpi_processor *pr = per_cpu(processors, cpu);
- dev_pm_qos_remove_request(&pr->perflib_req);
+ if (pr)
+ dev_pm_qos_remove_request(&pr->perflib_req);
}
static int acpi_processor_get_performance_control(struct acpi_processor *pr)
struct acpi_processor *pr = per_cpu(processors, cpu);
int ret;
+ if (!pr)
+ return;
+
ret = dev_pm_qos_add_request(get_cpu_device(cpu),
&pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY,
INT_MAX);
- if (ret < 0) {
+ if (ret < 0)
pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
ret);
- return;
- }
}
void acpi_thermal_cpufreq_exit(int cpu)
{
struct acpi_processor *pr = per_cpu(processors, cpu);
- dev_pm_qos_remove_request(&pr->thermal_req);
+ if (pr)
+ dev_pm_qos_remove_request(&pr->thermal_req);
}
#else /* ! CONFIG_CPU_FREQ */
static int cpufreq_get_max_state(unsigned int cpu)
DMI_MATCH(DMI_PRODUCT_NAME, "80E3"),
},
},
- /*
- * https://bugzilla.kernel.org/show_bug.cgi?id=196907
- * Some Dell XPS13 9360 cannot do suspend-to-idle using the Low Power
- * S0 Idle firmware interface.
- */
- {
- .callback = init_default_s3,
- .ident = "Dell XPS13 9360",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"),
- },
- },
/*
* ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using
* the Low Power S0 Idle firmware interface (see
*/
if (!id || id->vendor != PCI_VENDOR_ID_INTEL)
return;
- if (((enum board_ids) id->driver_data) < board_ahci_pcs7)
+
+ /* Skip applying the quirk on Denverton and beyond */
+ if (((enum board_ids) id->driver_data) >= board_ahci_pcs7)
return;
/*
*/
#include <linux/acpi.h>
+#include <linux/cpufreq.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fwnode.h>
wait_for_device_probe();
device_block_probing();
+ cpufreq_suspend();
+
spin_lock(&devices_kset->list_lock);
/*
* Walk the devices list backward, shutting down each in turn.
pfn >>= PAGE_SHIFT;
if (!pfn_valid(pfn))
return -ENXIO;
+ /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
+ if (!pfn_to_online_page(pfn))
+ return -EIO;
ret = soft_offline_page(pfn_to_page(pfn), 0);
return ret == 0 ? count : ret;
}
queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
- if (ret > 0)
+ if (ret > 0) {
ret = rbd_dev->acquire_err;
- else if (!ret)
- ret = -ETIMEDOUT;
+ } else {
+ cancel_delayed_work_sync(&rbd_dev->lock_dwork);
+ if (!ret)
+ ret = -ETIMEDOUT;
+ }
if (ret) {
rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
static ssize_t backing_dev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ struct file *file;
struct zram *zram = dev_to_zram(dev);
- struct file *file = zram->backing_dev;
char *p;
ssize_t ret;
down_read(&zram->init_lock);
- if (!zram->backing_dev) {
+ file = zram->backing_dev;
+ if (!file) {
memcpy(buf, "none\n", 5);
up_read(&zram->init_lock);
return 5;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
-/*
- * Stop cpufreq at shutdown to make sure it isn't holding any locks
- * or mutexes when secondary CPUs are halted.
- */
-static struct syscore_ops cpufreq_syscore_ops = {
- .shutdown = cpufreq_suspend,
-};
-
struct kobject *cpufreq_global_kobject;
EXPORT_SYMBOL(cpufreq_global_kobject);
cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
BUG_ON(!cpufreq_global_kobject);
- register_syscore_ops(&cpufreq_syscore_ops);
-
return 0;
}
module_param(off, int, 0444);
#include <linux/ccp.h>
#include <linux/firmware.h>
+#include <asm/smp.h>
+
#include "sp-dev.h"
#include "psp-dev.h"
return rc;
psp->sev_state = SEV_STATE_INIT;
+
+ /* Prepare for first SEV guest launch after INIT */
+ wbinvd_on_all_cpus();
+ rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, error);
+ if (rc)
+ return rc;
+
dev_dbg(psp->dev, "SEV firmware initialized\n");
return rc;
if (pfence_excl)
*pfence_excl = fence_excl;
else if (fence_excl)
- shared[++shared_count] = fence_excl;
+ shared[shared_count++] = fence_excl;
if (!shared_count) {
kfree(shared);
bytes = ~0ull;
else if (size & 0x8000)
bytes = (u64)(size & 0x7fff) << 10;
- else if (size != 0x7fff)
+ else if (size != 0x7fff || dm->length < 0x20)
bytes = (u64)size << 20;
else
bytes = (u64)get_unaligned((u32 *)&d[0x1C]) << 20;
chip->irq_eoi(data);
}
-static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv)
+static int intel_mid_irq_init_hw(struct gpio_chip *chip)
{
+ struct intel_mid_gpio *priv = gpiochip_get_data(chip);
void __iomem *reg;
unsigned base;
reg = gpio_reg(&priv->chip, base, GEDR);
writel(~0, reg);
}
+
+ return 0;
}
static int __maybe_unused intel_gpio_runtime_idle(struct device *dev)
girq = &priv->chip.irq;
girq->chip = &intel_mid_irqchip;
+ girq->init_hw = intel_mid_irq_init_hw;
girq->parent_handler = intel_mid_irq_handler;
girq->num_parents = 1;
girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_simple_irq;
- intel_mid_irq_init_hw(priv);
-
pci_set_drvdata(pdev, priv);
+
retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
if (retval) {
dev_err(&pdev->dev, "gpiochip_add error %d\n", retval);
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-static void lp_gpio_irq_init_hw(struct lp_gpio *lg)
+static int lp_gpio_irq_init_hw(struct gpio_chip *chip)
{
+ struct lp_gpio *lg = gpiochip_get_data(chip);
unsigned long reg;
unsigned base;
reg = lp_gpio_reg(&lg->chip, base, LP_INT_STAT);
outl(0xffffffff, reg);
}
+
+ return 0;
}
static int lp_gpio_probe(struct platform_device *pdev)
girq = &gc->irq;
girq->chip = &lp_irqchip;
+ girq->init_hw = lp_gpio_irq_init_hw;
girq->parent_handler = lp_gpio_irq_handler;
girq->num_parents = 1;
girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
return -ENOMEM;
girq->parents[0] = (unsigned)irq_rc->start;
girq->default_type = IRQ_TYPE_NONE;
- girq->handler = handle_simple_irq;
-
- lp_gpio_irq_init_hw(lg);
+ girq->handler = handle_bad_irq;
}
ret = devm_gpiochip_add_data(dev, gc, lg);
chained_irq_exit(irqchip, desc);
}
-static void mrfld_irq_init_hw(struct mrfld_gpio *priv)
+static int mrfld_irq_init_hw(struct gpio_chip *chip)
{
+ struct mrfld_gpio *priv = gpiochip_get_data(chip);
void __iomem *reg;
unsigned int base;
reg = gpio_reg(&priv->chip, base, GFER);
writel(0, reg);
}
+
+ return 0;
}
static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv)
girq = &priv->chip.irq;
girq->chip = &mrfld_irqchip;
+ girq->init_hw = mrfld_irq_init_hw;
girq->parent_handler = mrfld_irq_handler;
girq->num_parents = 1;
girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
if (!girq->parents)
return -ENOMEM;
girq->parents[0] = pdev->irq;
+ girq->first = irq_base;
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_bad_irq;
- mrfld_irq_init_hw(priv);
-
pci_set_drvdata(pdev, priv);
retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
if (retval) {
struct lock_class_key *lock_key,
struct lock_class_key *request_key);
static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
+static int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip);
static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
machine_gpiochip_add(chip);
+ ret = gpiochip_irqchip_init_hw(chip);
+ if (ret)
+ goto err_remove_acpi_chip;
+
ret = gpiochip_irqchip_init_valid_mask(chip);
if (ret)
goto err_remove_acpi_chip;
* The following is irqchip helper code for gpiochips.
*/
+static int gpiochip_irqchip_init_hw(struct gpio_chip *gc)
+{
+ struct gpio_irq_chip *girq = &gc->irq;
+
+ if (!girq->init_hw)
+ return 0;
+
+ return girq->init_hw(gc);
+}
+
static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc)
{
struct gpio_irq_chip *girq = &gc->irq;
{
return 0;
}
-
static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) {}
+
+static inline int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip)
+{
+ return 0;
+}
+
static inline int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip)
{
return 0;
return -ENODEV;
}
+#ifdef CONFIG_DRM_AMDGPU_SI
+ if (!amdgpu_si_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ dev_info(&pdev->dev,
+ "SI support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ if (!amdgpu_cik_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_KAVERI:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ dev_info(&pdev->dev,
+ "CIK support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+
/* Get rid of things like offb */
ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
if (ret)
struct amdgpu_device *adev;
int r, acpi_status;
-#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- dev_info(dev->dev,
- "SI support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- dev_info(dev->dev,
- "CIK support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-
adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
if (adev == NULL) {
return -ENOMEM;
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, seq); /* reference */
- amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
}
count = SMU_MAX_SMIO_LEVELS;
for (level = 0; level < count; level++) {
table->SmioTable2.Pattern[level].Voltage =
- PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+ PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE);
/* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
table->SmioTable2.Pattern[level].Smio =
(uint8_t) level;
count = SMU_MAX_SMIO_LEVELS;
for (level = 0; level < count; level++) {
table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US(
- data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+ data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE);
/* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
table->SmioTable2.Pattern[level].Smio =
(uint8_t) level;
/* Medion MD 30217 PG */
{ "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 },
+ /* Lenovo G50 */
+ { "SDC", 18514, EDID_QUIRK_FORCE_6BPC },
+
/* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
{ "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, "
"disabling port %c DVI/HDMI support\n",
port_name(port), info->alternate_ddc_pin,
- port_name(p), port_name(port));
+ port_name(p), port_name(p));
/*
* If we have multiple ports supposedly sharing the
* port. Otherwise they share the same ddc bin and
* system couldn't communicate with them separately.
*
- * Give child device order the priority, first come first
- * served.
+ * Give inverse child device order the priority,
+ * last one wins. Yes, there are real machines
+ * (eg. Asrock B250M-HDV) where VBT has both
+ * port A and port E with the same AUX ch and
+ * we must pick port E :(
*/
+ info = &dev_priv->vbt.ddi_port_info[p];
+
info->supports_dvi = false;
info->supports_hdmi = false;
info->alternate_ddc_pin = 0;
DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, "
"disabling port %c DP support\n",
port_name(port), info->alternate_aux_channel,
- port_name(p), port_name(port));
+ port_name(p), port_name(p));
/*
* If we have multiple ports supposedlt sharing the
* port. Otherwise they share the same aux channel
* and system couldn't communicate with them separately.
*
- * Give child device order the priority, first come first
- * served.
+ * Give inverse child device order the priority,
+ * last one wins. Yes, there are real machines
+ * (eg. Asrock B250M-HDV) where VBT has both
+ * port A and port E with the same AUX ch and
+ * we must pick port E :(
*/
+ info = &dev_priv->vbt.ddi_port_info[p];
+
info->supports_dp = false;
info->alternate_aux_channel = 0;
}
return VM_FAULT_OOM;
case -ENOSPC:
case -EFAULT:
+ case -ENODEV: /* bad object, how did you get here! */
return VM_FAULT_SIGBUS;
default:
WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
if (!obj)
return -ENOENT;
+ if (i915_gem_object_never_bind_ggtt(obj)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
ret = create_mmap_offset(obj);
if (ret == 0)
*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+out:
i915_gem_object_put(obj);
return ret;
}
return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
}
+static inline bool
+i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj)
+{
+ return obj->ops->flags & I915_GEM_OBJECT_NO_GGTT;
+}
+
static inline bool
i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
{
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
#define I915_GEM_OBJECT_IS_PROXY BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
+#define I915_GEM_OBJECT_NO_GGTT BIT(3)
+#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4)
/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
I915_GEM_OBJECT_IS_SHRINKABLE |
+ I915_GEM_OBJECT_NO_GGTT |
I915_GEM_OBJECT_ASYNC_CANCEL,
.get_pages = i915_gem_userptr_get_pages,
.put_pages = i915_gem_userptr_put_pages,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static void mark_eio(struct i915_request *rq)
+{
+ if (!i915_request_signaled(rq))
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+}
+
static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
submit = true;
last = rq;
}
+ i915_request_put(rq);
/*
* Hmm, we have a bunch of virtual engine requests,
__execlists_reset(engine, true);
/* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
- i915_request_mark_complete(rq);
- }
+ list_for_each_entry(rq, &engine->active.requests, sched.link)
+ mark_eio(rq);
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
+ mark_eio(rq);
__i915_request_submit(rq);
- dma_fence_set_error(&rq->fence, -EIO);
- i915_request_mark_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
RB_CLEAR_NODE(rb);
spin_lock(&ve->base.active.lock);
- if (ve->request) {
- ve->request->engine = engine;
- __i915_request_submit(ve->request);
- dma_fence_set_error(&ve->request->fence, -EIO);
- i915_request_mark_complete(ve->request);
+ rq = fetch_and_zero(&ve->request);
+ if (rq) {
+ mark_eio(rq);
+
+ rq->engine = engine;
+ __i915_request_submit(rq);
+ i915_request_put(rq);
+
ve->base.execlists.queue_priority_hint = INT_MIN;
- ve->request = NULL;
}
spin_unlock(&ve->base.active.lock);
}
static void virtual_submit_request(struct i915_request *rq)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ struct i915_request *old;
+ unsigned long flags;
GEM_TRACE("%s: rq=%llx:%lld\n",
ve->base.name,
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
- GEM_BUG_ON(ve->request);
- GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ spin_lock_irqsave(&ve->base.active.lock, flags);
+
+ old = ve->request;
+ if (old) { /* background completion event from preempt-to-busy */
+ GEM_BUG_ON(!i915_request_completed(old));
+ __i915_request_submit(old);
+ i915_request_put(old);
+ }
- ve->base.execlists.queue_priority_hint = rq_prio(rq);
- WRITE_ONCE(ve->request, rq);
+ if (i915_request_completed(rq)) {
+ __i915_request_submit(rq);
- list_move_tail(&rq->sched.link, virtual_queue(ve));
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ ve->request = NULL;
+ } else {
+ ve->base.execlists.queue_priority_hint = rq_prio(rq);
+ ve->request = i915_request_get(rq);
+
+ GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ list_move_tail(&rq->sched.link, virtual_queue(ve));
+
+ tasklet_schedule(&ve->base.execlists.tasklet);
+ }
- tasklet_schedule(&ve->base.execlists.tasklet);
+ spin_unlock_irqrestore(&ve->base.active.lock, flags);
}
static struct ve_bond *
lockdep_assert_held(&obj->base.dev->struct_mutex);
+ if (i915_gem_object_never_bind_ggtt(obj))
+ return ERR_PTR(-ENODEV);
+
if (flags & PIN_MAPPABLE &&
(!view || view->type == I915_GGTT_VIEW_NORMAL)) {
/* If the required space is larger than the available
#include "dsi_cfg.h"
#include "msm_kms.h"
+#define DSI_RESET_TOGGLE_DELAY_MS 20
+
static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor)
{
u32 ver;
wmb(); /* clocks need to be enabled before reset */
dsi_write(msm_host, REG_DSI_RESET, 1);
- wmb(); /* make sure reset happen */
+ msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */
dsi_write(msm_host, REG_DSI_RESET, 0);
}
/* dsi controller can only be reset while clocks are running */
dsi_write(msm_host, REG_DSI_RESET, 1);
- wmb(); /* make sure reset happen */
+ msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */
dsi_write(msm_host, REG_DSI_RESET, 0);
wmb(); /* controller out of reset */
dsi_write(msm_host, REG_DSI_CTRL, data0);
pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES);
pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES);
pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES);
+ pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
+ pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
+ pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
for (i = 0; i < 4; i++)
pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i));
job_read(pfdev, JS_TAIL_LO(js)),
sched_job);
- mutex_lock(&pfdev->reset_lock);
+ if (!mutex_trylock(&pfdev->reset_lock))
+ return;
- for (i = 0; i < NUM_JOB_SLOTS; i++)
- drm_sched_stop(&pfdev->js->queue[i].sched, sched_job);
+ for (i = 0; i < NUM_JOB_SLOTS; i++) {
+ struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
+
+ drm_sched_stop(sched, sched_job);
+ if (js != i)
+ /* Ensure any timeouts on other slots have finished */
+ cancel_delayed_work_sync(&sched->work_tdr);
+ }
- if (sched_job)
- drm_sched_increase_karma(sched_job);
+ drm_sched_increase_karma(sched_job);
spin_lock_irqsave(&pfdev->js->job_lock, flags);
for (i = 0; i < NUM_JOB_SLOTS; i++) {
static void
radeon_pci_shutdown(struct pci_dev *pdev)
{
- struct drm_device *ddev = pci_get_drvdata(pdev);
-
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown
*/
if (radeon_device_is_virtual())
radeon_pci_remove(pdev);
-
- /* Some adapters need to be suspended before a
- * shutdown occurs in order to prevent an error
- * during kexec.
- */
- radeon_suspend_kms(ddev, true, true, false);
}
static int radeon_pmops_suspend(struct device *dev)
depends on DRM && SPI
select DRM_KMS_HELPER
select DRM_KMS_CMA_HELPER
- depends on THERMAL || !THERMAL
help
DRM driver for the following Pervasive Displays panels:
1.44" TFT EPD Panel (E1144CS021)
list_add_tail(&bo->lru, &man->lru[bo->priority]);
kref_get(&bo->list_kref);
- if (bo->ttm && !(bo->ttm->page_flags &
- (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) {
+ if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm &&
+ !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
+ TTM_PAGE_FLAG_SWAPPED))) {
list_add_tail(&bo->swap, &bdev->glob->swap_lru[bo->priority]);
kref_get(&bo->list_kref);
}
if (!bo) {
if (busy_bo)
- ttm_bo_get(busy_bo);
+ kref_get(&busy_bo->list_kref);
spin_unlock(&glob->lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
- ttm_bo_put(busy_bo);
+ kref_put(&busy_bo->list_kref, ttm_bo_release_list);
return ret;
}
else
ret = vmf_insert_pfn(&cvma, address, pfn);
- /*
- * Somebody beat us to this PTE or prefaulting to
- * an already populated PTE, or prefaulting error.
- */
-
- if (unlikely((ret == VM_FAULT_NOPAGE && i > 0)))
- break;
- else if (unlikely(ret & VM_FAULT_ERROR))
- goto out_io_unlock;
+ /* Never error on prefaulted PTEs */
+ if (unlikely((ret & VM_FAULT_ERROR))) {
+ if (i == 0)
+ goto out_io_unlock;
+ else
+ break;
+ }
address += PAGE_SIZE;
if (unlikely(++page_offset >= page_last))
onkey->input->phys = onkey->phys;
onkey->input->dev.parent = &pdev->dev;
- if (onkey->key_power)
- input_set_capability(onkey->input, EV_KEY, KEY_POWER);
-
- input_set_capability(onkey->input, EV_KEY, KEY_SLEEP);
+ input_set_capability(onkey->input, EV_KEY, KEY_POWER);
INIT_DELAYED_WORK(&onkey->work, da9063_poll_on);
continue;
gpio = soc_button_lookup_gpio(&pdev->dev, info->acpi_index);
- if (gpio < 0 && gpio != -ENOENT) {
- error = gpio;
- goto err_free_mem;
- } else if (!gpio_is_valid(gpio)) {
- /* Skip GPIO if not present */
+ if (!gpio_is_valid(gpio)) {
+ /*
+ * Skip GPIO if not present. Note we deliberately
+ * ignore -EPROBE_DEFER errors here. On some devices
+ * Intel is using so called virtual GPIOs which are not
+ * GPIOs at all but some way for AML code to check some
+ * random status bits without need a custom opregion.
+ * In some cases the resources table we parse points to
+ * such a virtual GPIO, since these are not real GPIOs
+ * we do not have a driver for these so they will never
+ * show up, therefore we ignore -EPROBE_DEFER.
+ */
continue;
}
leave_breadcrumbs);
}
-static bool elantech_use_host_notify(struct psmouse *psmouse,
- struct elantech_device_info *info)
-{
- if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
- return true;
-
- switch (info->bus) {
- case ETP_BUS_PS2_ONLY:
- /* expected case */
- break;
- case ETP_BUS_SMB_HST_NTFY_ONLY:
- case ETP_BUS_PS2_SMB_HST_NTFY:
- /* SMbus implementation is stable since 2018 */
- if (dmi_get_bios_year() >= 2018)
- return true;
- /* fall through */
- default:
- psmouse_dbg(psmouse,
- "Ignoring SMBus bus provider %d\n", info->bus);
- break;
- }
-
- return false;
-}
-
/**
* elantech_setup_smbus - called once the PS/2 devices are enumerated
* and decides to instantiate a SMBus InterTouch device.
* i2c_blacklist_pnp_ids.
* Old ICs are up to the user to decide.
*/
- if (!elantech_use_host_notify(psmouse, info) ||
+ if (!ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version) ||
psmouse_matches_pnp_id(psmouse, i2c_blacklist_pnp_ids))
return -ENXIO;
}
return 0;
}
+static bool elantech_use_host_notify(struct psmouse *psmouse,
+ struct elantech_device_info *info)
+{
+ if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
+ return true;
+
+ switch (info->bus) {
+ case ETP_BUS_PS2_ONLY:
+ /* expected case */
+ break;
+ case ETP_BUS_SMB_ALERT_ONLY:
+ /* fall-through */
+ case ETP_BUS_PS2_SMB_ALERT:
+ psmouse_dbg(psmouse, "Ignoring SMBus provider through alert protocol.\n");
+ break;
+ case ETP_BUS_SMB_HST_NTFY_ONLY:
+ /* fall-through */
+ case ETP_BUS_PS2_SMB_HST_NTFY:
+ return true;
+ default:
+ psmouse_dbg(psmouse,
+ "Ignoring SMBus bus provider %d.\n",
+ info->bus);
+ }
+
+ return false;
+}
+
int elantech_init_smbus(struct psmouse *psmouse)
{
struct elantech_device_info info;
}
mutex_lock(&data->irq_mutex);
- bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask,
+ bitmap_and(data->irq_status, data->irq_status, data->fn_irq_bits,
data->irq_count);
/*
* At this point, irq_status has all bits that are set in the
bitmap_copy(data->current_irq_mask, data->new_irq_mask,
data->num_of_irq_regs);
+ bitmap_or(data->fn_irq_bits, data->fn_irq_bits, mask, data->irq_count);
+
error_unlock:
mutex_unlock(&data->irq_mutex);
return error;
struct device *dev = &rmi_dev->dev;
mutex_lock(&data->irq_mutex);
+ bitmap_andnot(data->fn_irq_bits,
+ data->fn_irq_bits, mask, data->irq_count);
bitmap_andnot(data->new_irq_mask,
data->current_irq_mask, mask, data->irq_count);
const char *cfg_name;
struct completion firmware_loading_complete;
unsigned long irq_flags;
+ unsigned int contact_size;
};
#define GOODIX_GPIO_INT_NAME "irq"
#define GOODIX_MAX_WIDTH 4096
#define GOODIX_INT_TRIGGER 1
#define GOODIX_CONTACT_SIZE 8
+#define GOODIX_MAX_CONTACT_SIZE 9
#define GOODIX_MAX_CONTACTS 10
#define GOODIX_CONFIG_MAX_LENGTH 240
{}
};
+static const struct dmi_system_id nine_bytes_report[] = {
+#if defined(CONFIG_DMI) && defined(CONFIG_X86)
+ {
+ .ident = "Lenovo YogaBook",
+ /* YB1-X91L/F and YB1-X90L/F */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9")
+ }
+ },
+#endif
+ {}
+};
+
/**
* goodix_i2c_read - read data from a register of the i2c slave device.
*
max_timeout = jiffies + msecs_to_jiffies(GOODIX_BUFFER_STATUS_TIMEOUT);
do {
error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR,
- data, GOODIX_CONTACT_SIZE + 1);
+ data, ts->contact_size + 1);
if (error) {
dev_err(&ts->client->dev, "I2C transfer error: %d\n",
error);
return -EPROTO;
if (touch_num > 1) {
- data += 1 + GOODIX_CONTACT_SIZE;
+ data += 1 + ts->contact_size;
error = goodix_i2c_read(ts->client,
GOODIX_READ_COOR_ADDR +
- 1 + GOODIX_CONTACT_SIZE,
+ 1 + ts->contact_size,
data,
- GOODIX_CONTACT_SIZE *
+ ts->contact_size *
(touch_num - 1));
if (error)
return error;
return 0;
}
-static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
+static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data)
{
int id = coor_data[0] & 0x0F;
int input_x = get_unaligned_le16(&coor_data[1]);
input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
}
+static void goodix_ts_report_touch_9b(struct goodix_ts_data *ts, u8 *coor_data)
+{
+ int id = coor_data[1] & 0x0F;
+ int input_x = get_unaligned_le16(&coor_data[3]);
+ int input_y = get_unaligned_le16(&coor_data[5]);
+ int input_w = get_unaligned_le16(&coor_data[7]);
+
+ input_mt_slot(ts->input_dev, id);
+ input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true);
+ touchscreen_report_pos(ts->input_dev, &ts->prop,
+ input_x, input_y, true);
+ input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w);
+ input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
+}
+
/**
* goodix_process_events - Process incoming events
*
*/
static void goodix_process_events(struct goodix_ts_data *ts)
{
- u8 point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
+ u8 point_data[1 + GOODIX_MAX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
int touch_num;
int i;
input_report_key(ts->input_dev, KEY_LEFTMETA, point_data[0] & BIT(4));
for (i = 0; i < touch_num; i++)
- goodix_ts_report_touch(ts,
- &point_data[1 + GOODIX_CONTACT_SIZE * i]);
+ if (ts->contact_size == 9)
+ goodix_ts_report_touch_9b(ts,
+ &point_data[1 + ts->contact_size * i]);
+ else
+ goodix_ts_report_touch_8b(ts,
+ &point_data[1 + ts->contact_size * i]);
input_mt_sync_frame(ts->input_dev);
input_sync(ts->input_dev);
"Applying '180 degrees rotated screen' quirk\n");
}
+ if (dmi_check_system(nine_bytes_report)) {
+ ts->contact_size = 9;
+
+ dev_dbg(&ts->client->dev,
+ "Non-standard 9-bytes report format quirk\n");
+ }
+
error = input_mt_init_slots(ts->input_dev, ts->max_touch_num,
INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
if (error) {
ts->client = client;
i2c_set_clientdata(client, ts);
init_completion(&ts->firmware_loading_complete);
+ ts->contact_size = GOODIX_CONTACT_SIZE;
error = goodix_get_gpio_config(ts);
if (error)
retry:
type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
- pasid = PPR_PASID(*(u64 *)&event[0]);
+ pasid = (event[0] & EVENT_DOMID_MASK_HI) |
+ (event[1] & EVENT_DOMID_MASK_LO);
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
address = (u64)(((u64)event[3]) << 32) | event[2];
address, flags);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
* to 64 bits.
*/
static bool increase_address_space(struct protection_domain *domain,
+ unsigned long address,
gfp_t gfp)
{
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
- if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
- /* address space already 64 bit large */
+ if (address <= PM_LEVEL_SIZE(domain->mode) ||
+ WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
goto out;
pte = (void *)get_zeroed_page(gfp);
BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(domain->mode))
- *updated = increase_address_space(domain, gfp) || *updated;
+ *updated = increase_address_space(domain, address, gfp) || *updated;
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
#define EVENT_TYPE_INV_PPR_REQ 0x9
#define EVENT_DEVID_MASK 0xffff
#define EVENT_DEVID_SHIFT 0
-#define EVENT_DOMID_MASK 0xffff
-#define EVENT_DOMID_SHIFT 0
+#define EVENT_DOMID_MASK_LO 0xffff
+#define EVENT_DOMID_MASK_HI 0xf0000
#define EVENT_FLAGS_MASK 0xfff
#define EVENT_FLAGS_SHIFT 0x10
return 0;
out_clear_smmu:
+ __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
smmu_domain->smmu = NULL;
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
#define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2)
#define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
+#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL
+#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
+
/* IOPTE accessors */
#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
static struct io_pgtable *
arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
- struct io_pgtable *iop;
+ struct arm_lpae_io_pgtable *data;
- if (cfg->ias != 48 || cfg->oas > 40)
+ /* No quirks for Mali (hopefully) */
+ if (cfg->quirks)
+ return NULL;
+
+ if (cfg->ias > 48 || cfg->oas > 40)
return NULL;
cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
- iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
- if (iop) {
- u64 mair, ttbr;
- /* Copy values as union fields overlap */
- mair = cfg->arm_lpae_s1_cfg.mair[0];
- ttbr = cfg->arm_lpae_s1_cfg.ttbr[0];
+ data = arm_lpae_alloc_pgtable(cfg);
+ if (!data)
+ return NULL;
- cfg->arm_mali_lpae_cfg.memattr = mair;
- cfg->arm_mali_lpae_cfg.transtab = ttbr |
- ARM_MALI_LPAE_TTBR_READ_INNER |
- ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ /* Mali seems to need a full 4-level table regardless of IAS */
+ if (data->levels < ARM_LPAE_MAX_LEVELS) {
+ data->levels = ARM_LPAE_MAX_LEVELS;
+ data->pgd_size = sizeof(arm_lpae_iopte);
}
+ /*
+ * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
+ * best we can do is mimic the out-of-tree driver and hope that the
+ * "implementation-defined caching policy" is good enough. Similarly,
+ * we'll use it for the sake of a valid attribute for our 'device'
+ * index, although callers should never request that in practice.
+ */
+ cfg->arm_mali_lpae_cfg.memattr =
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
+ (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
- return iop;
+ data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ if (!data->pgd)
+ goto out_free_data;
+
+ /* Ensure the empty pgd is visible before TRANSTAB can be written */
+ wmb();
+
+ cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
+ ARM_MALI_LPAE_TTBR_READ_INNER |
+ ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ return &data->iop;
+
+out_free_data:
+ kfree(data);
+ return NULL;
}
struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts);
- irq = platform_get_irq(pdev, 0);
-
/*
* Determine if this IPMMU instance is a root device by checking for
* the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property.
/* Root devices have mandatory IRQs */
if (ipmmu_is_root(mmu)) {
+ irq = platform_get_irq(pdev, 0);
if (irq < 0) {
dev_err(&pdev->dev, "no IRQ found\n");
return irq;
struct device *dev;
void __iomem **bases;
int num_mmu;
+ int num_irq;
struct clk_bulk_data *clocks;
int num_clocks;
bool reset_disabled;
struct rk_iommu *iommu;
struct resource *res;
int num_res = pdev->num_resources;
- int err, i, irq;
+ int err, i;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
if (iommu->num_mmu == 0)
return PTR_ERR(iommu->bases[0]);
+ iommu->num_irq = platform_irq_count(pdev);
+ if (iommu->num_irq < 0)
+ return iommu->num_irq;
+
iommu->reset_disabled = device_property_read_bool(dev,
"rockchip,disable-mmu-reset");
pm_runtime_enable(dev);
- i = 0;
- while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+ for (i = 0; i < iommu->num_irq; i++) {
+ int irq = platform_get_irq(pdev, i);
+
if (irq < 0)
return irq;
static void rk_iommu_shutdown(struct platform_device *pdev)
{
struct rk_iommu *iommu = platform_get_drvdata(pdev);
- int i = 0, irq;
+ int i;
+
+ for (i = 0; i < iommu->num_irq; i++) {
+ int irq = platform_get_irq(pdev, i);
- while ((irq = platform_get_irq(pdev, i++)) != -ENXIO)
devm_free_irq(iommu->dev, irq, iommu);
+ }
pm_runtime_force_suspend(&pdev->dev);
}
/* FIC Registers */
#define AL_FIC_CAUSE 0x00
+#define AL_FIC_SET_CAUSE 0x08
#define AL_FIC_MASK 0x10
#define AL_FIC_CONTROL 0x28
chained_irq_exit(irqchip, desc);
}
+static int al_fic_irq_retrigger(struct irq_data *data)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+ struct al_fic *fic = gc->private;
+
+ writel_relaxed(BIT(data->hwirq), fic->base + AL_FIC_SET_CAUSE);
+
+ return 1;
+}
+
static int al_fic_register(struct device_node *node,
struct al_fic *fic)
{
gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit;
gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit;
gc->chip_types->chip.irq_set_type = al_fic_irq_set_type;
+ gc->chip_types->chip.irq_retrigger = al_fic_irq_retrigger;
gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE;
gc->private = fic;
static const struct of_device_id aic5_irq_fixups[] __initconst = {
{ .compatible = "atmel,sama5d3", .data = sama5d3_aic_irq_fixup },
{ .compatible = "atmel,sama5d4", .data = sama5d3_aic_irq_fixup },
+ { .compatible = "microchip,sam9x60", .data = sama5d3_aic_irq_fixup },
{ /* sentinel */ },
};
return aic5_of_init(node, parent, NR_SAMA5D4_IRQS);
}
IRQCHIP_DECLARE(sama5d4_aic5, "atmel,sama5d4-aic", sama5d4_aic5_of_init);
+
+#define NR_SAM9X60_IRQS 50
+
+static int __init sam9x60_aic5_of_init(struct device_node *node,
+ struct device_node *parent)
+{
+ return aic5_of_init(node, parent, NR_SAM9X60_IRQS);
+}
+IRQCHIP_DECLARE(sam9x60_aic5, "microchip,sam9x60-aic", sam9x60_aic5_of_init);
static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
-#define GIC_LINE_NR max(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
+#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
/*
}
}
-static void plic_irq_enable(struct irq_data *d)
+static void plic_irq_unmask(struct irq_data *d)
{
unsigned int cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
cpu_online_mask);
plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
}
-static void plic_irq_disable(struct irq_data *d)
+static void plic_irq_mask(struct irq_data *d)
{
plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
}
if (cpu >= nr_cpu_ids)
return -EINVAL;
- if (!irqd_irq_disabled(d)) {
- plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
- plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
- }
+ plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
+ plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
irq_data_update_effective_affinity(d, cpumask_of(cpu));
}
#endif
+static void plic_irq_eoi(struct irq_data *d)
+{
+ struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+
+ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+}
+
static struct irq_chip plic_chip = {
.name = "SiFive PLIC",
- /*
- * There is no need to mask/unmask PLIC interrupts. They are "masked"
- * by reading claim and "unmasked" when writing it back.
- */
- .irq_enable = plic_irq_enable,
- .irq_disable = plic_irq_disable,
+ .irq_mask = plic_irq_mask,
+ .irq_unmask = plic_irq_unmask,
+ .irq_eoi = plic_irq_eoi,
#ifdef CONFIG_SMP
.irq_set_affinity = plic_set_affinity,
#endif
static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
{
- irq_set_chip_and_handler(irq, &plic_chip, handle_simple_irq);
+ irq_set_chip_and_handler(irq, &plic_chip, handle_fasteoi_irq);
irq_set_chip_data(irq, NULL);
irq_set_noprobe(irq);
return 0;
hwirq);
else
generic_handle_irq(irq);
- writel(hwirq, claim);
}
csr_set(sie, SIE_SEIE);
}
static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
{
- return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT);
+ return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
}
static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
{
struct dm_cache_migration *mg;
- mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT);
- if (!mg)
- return NULL;
+ mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
memset(mg, 0, sizeof(*mg));
struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
- if (!cell_prealloc) {
- defer_bio(cache, bio);
- return false;
- }
build_key(oblock, end, &key);
r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
struct dm_bio_prison_cell_v2 *prealloc;
prealloc = alloc_prison_cell(cache);
- if (!prealloc) {
- DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache));
- mg_complete(mg, false);
- return -ENOMEM;
- }
/*
* Prevent writes to the block, but allow reads to continue.
}
mg = alloc_migration(cache);
- if (!mg) {
- policy_complete_background_work(cache->policy, op, false);
- background_work_end(cache);
- return -ENOMEM;
- }
mg->op = op;
mg->overwrite_bio = bio;
struct dm_bio_prison_cell_v2 *prealloc;
prealloc = alloc_prison_cell(cache);
- if (!prealloc) {
- invalidate_complete(mg, false);
- return -ENOMEM;
- }
build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
r = dm_cell_lock_v2(cache->prison, &key,
return -EPERM;
mg = alloc_migration(cache);
- if (!mg) {
- background_work_end(cache);
- return -ENOMEM;
- }
mg->overwrite_bio = bio;
mg->invalidate_cblock = cblock;
*
* NOTE: Must be called with the bucket lock held
*/
-struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
- unsigned long region_nr)
+static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
+ unsigned long region_nr)
{
struct dm_clone_region_hydration *hd;
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
-#include <linux/semaphore.h>
#include "dm.h"
/* The on disk metadata handler */
struct dm_exception_store *store;
- /* Maximum number of in-flight COW jobs. */
- struct semaphore cow_count;
+ unsigned in_progress;
+ struct wait_queue_head in_progress_wait;
struct dm_kcopyd_client *kcopyd_client;
*/
#define DEFAULT_COW_THRESHOLD 2048
-static int cow_threshold = DEFAULT_COW_THRESHOLD;
-module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
+static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
+module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
goto bad_hash_tables;
}
- sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
+ init_waitqueue_head(&s->in_progress_wait);
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(s->kcopyd_client)) {
dm_put_device(ti, s->origin);
+ WARN_ON(s->in_progress);
+
kfree(s);
}
+static void account_start_copy(struct dm_snapshot *s)
+{
+ spin_lock(&s->in_progress_wait.lock);
+ s->in_progress++;
+ spin_unlock(&s->in_progress_wait.lock);
+}
+
+static void account_end_copy(struct dm_snapshot *s)
+{
+ spin_lock(&s->in_progress_wait.lock);
+ BUG_ON(!s->in_progress);
+ s->in_progress--;
+ if (likely(s->in_progress <= cow_threshold) &&
+ unlikely(waitqueue_active(&s->in_progress_wait)))
+ wake_up_locked(&s->in_progress_wait);
+ spin_unlock(&s->in_progress_wait.lock);
+}
+
+static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
+{
+ if (unlikely(s->in_progress > cow_threshold)) {
+ spin_lock(&s->in_progress_wait.lock);
+ if (likely(s->in_progress > cow_threshold)) {
+ /*
+ * NOTE: this throttle doesn't account for whether
+ * the caller is servicing an IO that will trigger a COW
+ * so excess throttling may result for chunks not required
+ * to be COW'd. But if cow_threshold was reached, extra
+ * throttling is unlikely to negatively impact performance.
+ */
+ DECLARE_WAITQUEUE(wait, current);
+ __add_wait_queue(&s->in_progress_wait, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&s->in_progress_wait.lock);
+ if (unlock_origins)
+ up_read(&_origins_lock);
+ io_schedule();
+ remove_wait_queue(&s->in_progress_wait, &wait);
+ return false;
+ }
+ spin_unlock(&s->in_progress_wait.lock);
+ }
+ return true;
+}
+
/*
* Flush a list of buffers.
*/
}
}
-static int do_origin(struct dm_dev *origin, struct bio *bio);
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
/*
* Flush a list of buffers.
while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
- r = do_origin(s->origin, bio);
+ r = do_origin(s->origin, bio, false);
if (r == DM_MAPIO_REMAPPED)
generic_make_request(bio);
bio = n;
rb_link_node(&pe->out_of_order_node, parent, p);
rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
}
- up(&s->cow_count);
+ account_end_copy(s);
}
/*
dest.count = src.count;
/* Hand over to kcopyd */
- down(&s->cow_count);
+ account_start_copy(s);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
}
pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io;
- down(&s->cow_count);
+ account_start_copy(s);
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe);
struct bio *bio = context;
struct dm_snapshot *s = bio->bi_private;
- up(&s->cow_count);
+ account_end_copy(s);
bio->bi_status = write_err ? BLK_STS_IOERR : 0;
bio_endio(bio);
}
dest.sector = bio->bi_iter.bi_sector;
dest.count = s->store->chunk_size;
- down(&s->cow_count);
+ account_start_copy(s);
WARN_ON_ONCE(bio->bi_private);
bio->bi_private = s;
dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
if (!s->valid)
return DM_MAPIO_KILL;
+ if (bio_data_dir(bio) == WRITE) {
+ while (unlikely(!wait_for_in_progress(s, false)))
+ ; /* wait_for_in_progress() has slept */
+ }
+
down_read(&s->lock);
dm_exception_table_lock(&lock);
if (bio_data_dir(bio) == WRITE) {
up_write(&s->lock);
- return do_origin(s->origin, bio);
+ return do_origin(s->origin, bio, false);
}
out_unlock:
/*
* Called on a write from the origin driver.
*/
-static int do_origin(struct dm_dev *origin, struct bio *bio)
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
{
struct origin *o;
int r = DM_MAPIO_REMAPPED;
+again:
down_read(&_origins_lock);
o = __lookup_origin(origin->bdev);
- if (o)
+ if (o) {
+ if (limit) {
+ struct dm_snapshot *s;
+ list_for_each_entry(s, &o->snapshots, list)
+ if (unlikely(!wait_for_in_progress(s, true)))
+ goto again;
+ }
+
r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
+ }
up_read(&_origins_lock);
return r;
dm_accept_partial_bio(bio, available_sectors);
/* Only tell snapshots if this is a write */
- return do_origin(o->dev, bio);
+ return do_origin(o->dev, bio, true);
}
/*
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
- pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
+ pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -ENOTSUPP;
goto abort;
}
if (!cnt) {
rc = -ENODEV;
pci_dev_busy = 1;
- goto err_out;
+ goto err_out_int;
}
jm = kzalloc(sizeof(struct jmb38x_ms)
struct tmio_mmc_dma *dma_priv;
struct tmio_mmc_host *host;
struct renesas_sdhi *priv;
+ int num_irqs, irq, ret, i;
struct resource *res;
- int irq, ret, i;
u16 ver;
of_data = of_device_get_match_data(&pdev->dev);
host->hs400_complete = renesas_sdhi_hs400_complete;
}
- i = 0;
- while (1) {
+ num_irqs = platform_irq_count(pdev);
+ if (num_irqs < 0) {
+ ret = num_irqs;
+ goto eirq;
+ }
+
+ /* There must be at least one IRQ source */
+ if (!num_irqs) {
+ ret = -ENXIO;
+ goto eirq;
+ }
+
+ for (i = 0; i < num_irqs; i++) {
irq = platform_get_irq(pdev, i);
- if (irq < 0)
- break;
- i++;
+ if (irq < 0) {
+ ret = irq;
+ goto eirq;
+ }
+
ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0,
dev_name(&pdev->dev), host);
if (ret)
goto eirq;
}
- /* There must be at least one IRQ source */
- if (!i) {
- ret = irq;
- goto eirq;
- }
-
dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n",
mmc_hostname(host->mmc), (unsigned long)
(platform_get_resource(pdev, IORESOURCE_MEM, 0)->start),
};
static const struct sdhci_pltfm_data sdhci_bcm2711_pltfm_data = {
+ .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
.ops = &sdhci_iproc_32only_ops,
};
const char *name;
irq[0] = platform_get_irq(pdev, 0);
- irq[1] = platform_get_irq(pdev, 1);
- if (irq[0] < 0) {
- dev_err(dev, "Get irq error\n");
+ irq[1] = platform_get_irq_optional(pdev, 1);
+ if (irq[0] < 0)
return -ENXIO;
- }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
reg = devm_ioremap_resource(dev, res);
* this to-be-skipped slave to send a packet out.
*/
old_arr = rtnl_dereference(bond->slave_arr);
- for (idx = 0; idx < old_arr->count; idx++) {
+ for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) {
if (skipslave == old_arr->arr[idx]) {
old_arr->arr[idx] =
old_arr->arr[old_arr->count-1];
loc = B53_EG_MIR_CTL;
b53_read16(dev, B53_MGMT_PAGE, loc, ®);
- reg &= ~MIRROR_MASK;
reg |= BIT(port);
b53_write16(dev, B53_MGMT_PAGE, loc, reg);
{
int i;
- mutex_init(&dev->stats_mutex);
- mutex_init(&dev->alu_mutex);
- mutex_init(&dev->vlan_mutex);
-
dev->ds->ops = &ksz8795_switch_ops;
for (i = 0; i < ARRAY_SIZE(ksz8795_switch_chips); i++) {
static int ksz8795_spi_probe(struct spi_device *spi)
{
+ struct regmap_config rc;
struct ksz_device *dev;
int i, ret;
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) {
- dev->regmap[i] = devm_regmap_init_spi(spi,
- &ksz8795_regmap_config
- [i]);
+ rc = ksz8795_regmap_config[i];
+ rc.lock_arg = &dev->regmap_mutex;
+ dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
if (IS_ERR(dev->regmap[i])) {
ret = PTR_ERR(dev->regmap[i]);
dev_err(&spi->dev,
static int ksz9477_i2c_probe(struct i2c_client *i2c,
const struct i2c_device_id *i2c_id)
{
+ struct regmap_config rc;
struct ksz_device *dev;
int i, ret;
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
- dev->regmap[i] = devm_regmap_init_i2c(i2c,
- &ksz9477_regmap_config[i]);
+ rc = ksz9477_regmap_config[i];
+ rc.lock_arg = &dev->regmap_mutex;
+ dev->regmap[i] = devm_regmap_init_i2c(i2c, &rc);
if (IS_ERR(dev->regmap[i])) {
ret = PTR_ERR(dev->regmap[i]);
dev_err(&i2c->dev,
-/* SPDX-License-Identifier: GPL-2.0
- *
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
* Microchip KSZ9477 register definitions
*
* Copyright (C) 2017-2018 Microchip Technology Inc.
static int ksz9477_spi_probe(struct spi_device *spi)
{
+ struct regmap_config rc;
struct ksz_device *dev;
int i, ret;
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
- dev->regmap[i] = devm_regmap_init_spi(spi,
- &ksz9477_regmap_config[i]);
+ rc = ksz9477_regmap_config[i];
+ rc.lock_arg = &dev->regmap_mutex;
+ dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
if (IS_ERR(dev->regmap[i])) {
ret = PTR_ERR(dev->regmap[i]);
dev_err(&spi->dev,
}
mutex_init(&dev->dev_mutex);
- mutex_init(&dev->stats_mutex);
+ mutex_init(&dev->regmap_mutex);
mutex_init(&dev->alu_mutex);
mutex_init(&dev->vlan_mutex);
-/* SPDX-License-Identifier: GPL-2.0
- * Microchip switch driver common header
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Microchip switch driver common header
*
* Copyright (C) 2017-2019 Microchip Technology Inc.
*/
const char *name;
struct mutex dev_mutex; /* device access */
- struct mutex stats_mutex; /* status access */
+ struct mutex regmap_mutex; /* regmap access */
struct mutex alu_mutex; /* ALU access */
struct mutex vlan_mutex; /* vlan access */
const struct ksz_dev_ops *dev_ops;
ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data);
}
+static inline void ksz_regmap_lock(void *__mtx)
+{
+ struct mutex *mtx = __mtx;
+ mutex_lock(mtx);
+}
+
+static inline void ksz_regmap_unlock(void *__mtx)
+{
+ struct mutex *mtx = __mtx;
+ mutex_unlock(mtx);
+}
+
/* Regmap tables generation */
#define KSZ_SPI_OP_RD 3
#define KSZ_SPI_OP_WR 2
.write_flag_mask = \
KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp, \
regbits, regpad), \
+ .lock = ksz_regmap_lock, \
+ .unlock = ksz_regmap_unlock, \
.reg_format_endian = REGMAP_ENDIAN_BIG, \
.val_format_endian = REGMAP_ENDIAN_BIG \
}
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_H
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_DYNAMIC_CONFIG_H
#define _SJA1105_DYNAMIC_CONFIG_H
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_PTP_H
#define _SJA1105_PTP_H
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2016-2018, NXP Semiconductors
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2016-2018, NXP Semiconductors
* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_STATIC_CONFIG_H
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_TAS_H
#define _SJA1105_TAS_H
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
- aq_nic_set_packet_filter(aq_nic, ndev->flags);
-
- aq_nic_set_multicast_list(aq_nic, ndev);
+ (void)aq_nic_set_multicast_list(aq_nic, ndev);
}
static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto,
int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
{
- unsigned int packet_filter = self->packet_filter;
+ const struct aq_hw_ops *hw_ops = self->aq_hw_ops;
+ struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+ unsigned int packet_filter = ndev->flags;
struct netdev_hw_addr *ha = NULL;
unsigned int i = 0U;
+ int err = 0;
self->mc_list.count = 0;
if (netdev_uc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
} else {
netdev_for_each_uc_addr(ha, ndev) {
ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
- if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
- break;
}
}
- if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
- packet_filter |= IFF_ALLMULTI;
- } else {
- netdev_for_each_mc_addr(ha, ndev) {
- ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
- if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
- break;
+ cfg->is_mc_list_enabled = !!(packet_filter & IFF_MULTICAST);
+ if (cfg->is_mc_list_enabled) {
+ if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
+ packet_filter |= IFF_ALLMULTI;
+ } else {
+ netdev_for_each_mc_addr(ha, ndev) {
+ ether_addr_copy(self->mc_list.ar[i++],
+ ha->addr);
+ }
}
}
if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) {
- packet_filter |= IFF_MULTICAST;
self->mc_list.count = i;
- self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
- self->mc_list.ar,
- self->mc_list.count);
+ err = hw_ops->hw_multicast_list_set(self->aq_hw,
+ self->mc_list.ar,
+ self->mc_list.count);
+ if (err < 0)
+ return err;
}
return aq_nic_set_packet_filter(self, packet_filter);
}
break;
buff->is_error |= buff_->is_error;
+ buff->is_cso_err |= buff_->is_cso_err;
} while (!buff_->is_eop);
err = 0;
goto err_exit;
}
- if (buff->is_error) {
+ if (buff->is_error || buff->is_cso_err) {
buff_ = buff;
do {
next_ = buff_->next,
cfg->is_vlan_force_promisc);
hw_atl_rpfl2multicast_flr_en_set(self,
- IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
+ IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+ IS_FILTER_ENABLED(IFF_MULTICAST), 0);
hw_atl_rpfl2_accept_all_mc_packets_set(self,
- IS_FILTER_ENABLED(IFF_ALLMULTI));
+ IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+ IS_FILTER_ENABLED(IFF_MULTICAST));
hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
- cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
hw_atl_rpfl2_uc_flr_en_set(self,
static int hw_atl_b0_hw_stop(struct aq_hw_s *self)
{
+ int err;
+ u32 val;
+
hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK);
/* Invalidate Descriptor Cache to prevent writing to the cached
* descriptors and to the data pointer of those descriptors
*/
- hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1);
+ hw_atl_rdm_rx_dma_desc_cache_init_tgl(self);
- return aq_hw_err_from_flags(self);
+ err = aq_hw_err_from_flags(self);
+
+ if (err)
+ goto err_exit;
+
+ readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get,
+ self, val, val == 1, 1000U, 10000U);
+
+err_exit:
+ return err;
}
static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self,
HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode);
}
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init)
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw)
{
+ u32 val;
+
+ val = aq_hw_read_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
+ HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
+ HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT);
+
aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT,
- init);
+ val ^ 1);
+}
+
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw)
+{
+ return aq_hw_read_reg_bit(aq_hw, RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR,
+ RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK,
+ RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT);
}
void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
u32 rx_pkt_buff_size_per_tc,
u32 buffer);
-/* set rdm rx dma descriptor cache init */
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init);
+/* toggle rdm rx dma descriptor cache init */
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw);
+
+/* get rdm rx dma descriptor cache init done */
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw);
/* set rx xoff enable (per tc) */
void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
/* default value of bitfield rdm_desc_init_i */
#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0
+/* rdm_desc_init_done_i bitfield definitions
+ * preprocessor definitions for the bitfield rdm_desc_init_done_i.
+ * port="pif_rdm_desc_init_done_i"
+ */
+
+/* register address for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR 0x00005a10
+/* bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK 0x00000001U
+/* inverted bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSKN 0xfffffffe
+/* lower bit position of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT 0U
+/* width of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_WIDTH 1
+/* default value of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_DEFAULT 0x0
+
+
/* rx int_desc_wrb_en bitfield definitions
* preprocessor definitions for the bitfield "int_desc_wrb_en".
* port="pif_rdm_int_desc_wrb_en_i"
/* Convert PHY temperature from 1/256 degree Celsius
* to 1/1000 degree Celsius.
*/
- *temp = temp_res * 1000 / 256;
+ *temp = (temp_res & 0xFFFF) * 1000 / 256;
return 0;
}
default y
depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \
SIBYTE_SB1xxx_SOC
- select DIMLIB
---help---
If you have a network (Ethernet) chipset belonging to this class,
say Y.
select FIXED_PHY
select BCM7XXX_PHY
select MDIO_BCM_UNIMAC
+ select DIMLIB
help
This driver supports the built-in Ethernet MACs found in the
Broadcom BCM7xxx Set Top Box family chipset.
select MII
select PHYLIB
select FIXED_PHY
+ select DIMLIB
help
This driver supports the built-in Ethernet MACs found in the
Broadcom BCM7xxx Set Top Box family chipset using an internal
select LIBCRC32C
select NET_DEVLINK
select PAGE_POOL
+ select DIMLIB
---help---
This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit
Ethernet cards. To compile this driver as a module, choose M here:
*/
if (priv->internal_phy) {
int0_enable |= UMAC_IRQ_LINK_EVENT;
+ if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+ int0_enable |= UMAC_IRQ_PHY_DET_R;
} else if (priv->ext_phy) {
int0_enable |= UMAC_IRQ_LINK_EVENT;
} else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
priv->irq0_stat = 0;
spin_unlock_irq(&priv->lock);
+ if (status & UMAC_IRQ_PHY_DET_R &&
+ priv->dev->phydev->autoneg != AUTONEG_ENABLE)
+ phy_init_hw(priv->dev->phydev);
+
/* Link UP/DOWN event */
- if (status & UMAC_IRQ_LINK_EVENT) {
- priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP);
+ if (status & UMAC_IRQ_LINK_EVENT)
phy_mac_interrupt(priv->dev->phydev);
- }
+
}
/* bcmgenet_isr1: handle Rx and Tx priority queues */
}
/* all other interested interrupts handled in bottom half */
- status &= UMAC_IRQ_LINK_EVENT;
+ status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R);
if (status) {
/* Save irq status for bottom-half processing. */
spin_lock_irqsave(&priv->lock, flags);
if (priv->internal_phy)
bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+ ret = bcmgenet_mii_connect(dev);
+ if (ret) {
+ netdev_err(dev, "failed to connect to PHY\n");
+ goto err_clk_disable;
+ }
+
/* take MAC out of reset */
bcmgenet_umac_reset(priv);
reg = bcmgenet_umac_readl(priv, UMAC_CMD);
priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
+ ret = bcmgenet_mii_config(dev, true);
+ if (ret) {
+ netdev_err(dev, "unsupported PHY\n");
+ goto err_disconnect_phy;
+ }
+
bcmgenet_set_hw_addr(priv, dev->dev_addr);
if (priv->internal_phy) {
ret = bcmgenet_init_dma(priv);
if (ret) {
netdev_err(dev, "failed to initialize DMA\n");
- goto err_clk_disable;
+ goto err_disconnect_phy;
}
/* Always enable ring 16 - descriptor ring */
goto err_irq0;
}
- ret = bcmgenet_mii_probe(dev);
- if (ret) {
- netdev_err(dev, "failed to connect to PHY\n");
- goto err_irq1;
- }
-
bcmgenet_netif_start(dev);
netif_tx_start_all_queues(dev);
return 0;
-err_irq1:
- free_irq(priv->irq1, priv);
err_irq0:
free_irq(priv->irq0, priv);
err_fini_dma:
bcmgenet_dma_teardown(priv);
bcmgenet_fini_dma(priv);
+err_disconnect_phy:
+ phy_disconnect(dev->phydev);
err_clk_disable:
if (priv->internal_phy)
bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
if (priv->internal_phy)
bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+ phy_init_hw(dev->phydev);
+
bcmgenet_umac_reset(priv);
init_umac(priv);
if (priv->wolopts)
clk_disable_unprepare(priv->clk_wol);
- phy_init_hw(dev->phydev);
-
/* Speed settings must be restored */
bcmgenet_mii_config(priv->dev, false);
#define EXT_PWR_DOWN_PHY_EN (1 << 20)
#define EXT_RGMII_OOB_CTRL 0x0C
+#define RGMII_MODE_EN_V123 (1 << 0)
#define RGMII_LINK (1 << 4)
#define OOB_DISABLE (1 << 5)
#define RGMII_MODE_EN (1 << 6)
/* MDIO routines */
int bcmgenet_mii_init(struct net_device *dev);
+int bcmgenet_mii_connect(struct net_device *dev);
int bcmgenet_mii_config(struct net_device *dev, bool init);
-int bcmgenet_mii_probe(struct net_device *dev);
void bcmgenet_mii_exit(struct net_device *dev);
void bcmgenet_phy_power_set(struct net_device *dev, bool enable);
void bcmgenet_mii_setup(struct net_device *dev);
bcmgenet_fixed_phy_link_update);
}
+int bcmgenet_mii_connect(struct net_device *dev)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct device_node *dn = priv->pdev->dev.of_node;
+ struct phy_device *phydev;
+ u32 phy_flags = 0;
+ int ret;
+
+ /* Communicate the integrated PHY revision */
+ if (priv->internal_phy)
+ phy_flags = priv->gphy_rev;
+
+ /* Initialize link state variables that bcmgenet_mii_setup() uses */
+ priv->old_link = -1;
+ priv->old_speed = -1;
+ priv->old_duplex = -1;
+ priv->old_pause = -1;
+
+ if (dn) {
+ phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
+ phy_flags, priv->phy_interface);
+ if (!phydev) {
+ pr_err("could not attach to PHY\n");
+ return -ENODEV;
+ }
+ } else {
+ phydev = dev->phydev;
+ phydev->dev_flags = phy_flags;
+
+ ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
+ priv->phy_interface);
+ if (ret) {
+ pr_err("could not attach to PHY\n");
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
int bcmgenet_mii_config(struct net_device *dev, bool init)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
*/
if (priv->ext_phy) {
reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
- reg |= RGMII_MODE_EN | id_mode_dis;
+ reg |= id_mode_dis;
+ if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+ reg |= RGMII_MODE_EN_V123;
+ else
+ reg |= RGMII_MODE_EN;
bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
}
- if (init)
- dev_info(kdev, "configuring instance for %s\n", phy_name);
-
- return 0;
-}
-
-int bcmgenet_mii_probe(struct net_device *dev)
-{
- struct bcmgenet_priv *priv = netdev_priv(dev);
- struct device_node *dn = priv->pdev->dev.of_node;
- struct phy_device *phydev;
- u32 phy_flags;
- int ret;
-
- /* Communicate the integrated PHY revision */
- phy_flags = priv->gphy_rev;
-
- /* Initialize link state variables that bcmgenet_mii_setup() uses */
- priv->old_link = -1;
- priv->old_speed = -1;
- priv->old_duplex = -1;
- priv->old_pause = -1;
-
- if (dn) {
- phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
- phy_flags, priv->phy_interface);
- if (!phydev) {
- pr_err("could not attach to PHY\n");
- return -ENODEV;
- }
- } else {
- phydev = dev->phydev;
- phydev->dev_flags = phy_flags;
+ if (init) {
+ linkmode_copy(phydev->advertising, phydev->supported);
- ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
- priv->phy_interface);
- if (ret) {
- pr_err("could not attach to PHY\n");
- return -ENODEV;
- }
- }
+ /* The internal PHY has its link interrupts routed to the
+ * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
+ * that prevents the signaling of link UP interrupts when
+ * the link operates at 10Mbps, so fallback to polling for
+ * those versions of GENET.
+ */
+ if (priv->internal_phy && !GENET_IS_V5(priv))
+ phydev->irq = PHY_IGNORE_INTERRUPT;
- /* Configure port multiplexer based on what the probed PHY device since
- * reading the 'max-speed' property determines the maximum supported
- * PHY speed which is needed for bcmgenet_mii_config() to configure
- * things appropriately.
- */
- ret = bcmgenet_mii_config(dev, true);
- if (ret) {
- phy_disconnect(dev->phydev);
- return ret;
+ dev_info(kdev, "configuring instance for %s\n", phy_name);
}
- linkmode_copy(phydev->advertising, phydev->supported);
-
- /* The internal PHY has its link interrupts routed to the
- * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
- * that prevents the signaling of link UP interrupts when
- * the link operates at 10Mbps, so fallback to polling for
- * those versions of GENET.
- */
- if (priv->internal_phy && !GENET_IS_V5(priv))
- dev->phydev->irq = PHY_IGNORE_INTERRUPT;
-
return 0;
}
return err;
}
- *tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+ *tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk");
if (IS_ERR(*tx_clk))
- *tx_clk = NULL;
+ return PTR_ERR(*tx_clk);
- *rx_clk = devm_clk_get(&pdev->dev, "rx_clk");
+ *rx_clk = devm_clk_get_optional(&pdev->dev, "rx_clk");
if (IS_ERR(*rx_clk))
- *rx_clk = NULL;
+ return PTR_ERR(*rx_clk);
- *tsu_clk = devm_clk_get(&pdev->dev, "tsu_clk");
+ *tsu_clk = devm_clk_get_optional(&pdev->dev, "tsu_clk");
if (IS_ERR(*tsu_clk))
- *tsu_clk = NULL;
+ return PTR_ERR(*tsu_clk);
err = clk_prepare_enable(*pclk);
if (err) {
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/* cavium_ptp.h - PTP 1588 clock on Cavium hardware
* Copyright (c) 2003-2015, 2017 Cavium, Inc.
*/
priv->rx_td_enabled = enable;
}
+static void update_tx_fqids(struct dpaa2_eth_priv *priv);
+
static int link_state_update(struct dpaa2_eth_priv *priv)
{
struct dpni_link_state state = {0};
goto out;
if (state.up) {
+ update_tx_fqids(priv);
netif_carrier_on(priv->net_dev);
netif_tx_start_all_queues(priv->net_dev);
} else {
return 0;
}
+static void update_tx_fqids(struct dpaa2_eth_priv *priv)
+{
+ struct dpni_queue_id qid = {0};
+ struct dpaa2_eth_fq *fq;
+ struct dpni_queue queue;
+ int i, j, err;
+
+ /* We only use Tx FQIDs for FQID-based enqueue, so check
+ * if DPNI version supports it before updating FQIDs
+ */
+ if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
+ DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
+ return;
+
+ for (i = 0; i < priv->num_fqs; i++) {
+ fq = &priv->fq[i];
+ if (fq->type != DPAA2_TX_CONF_FQ)
+ continue;
+ for (j = 0; j < dpaa2_eth_tc_count(priv); j++) {
+ err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+ DPNI_QUEUE_TX, j, fq->flowid,
+ &queue, &qid);
+ if (err)
+ goto out_err;
+
+ fq->tx_fqid[j] = qid.fqid;
+ if (fq->tx_fqid[j] == 0)
+ goto out_err;
+ }
+ }
+
+ priv->enqueue = dpaa2_eth_enqueue_fq;
+
+ return;
+
+out_err:
+ netdev_info(priv->net_dev,
+ "Error reading Tx FQID, fallback to QDID-based enqueue\n");
+ priv->enqueue = dpaa2_eth_enqueue_qd;
+}
+
/* Configure the DPNI object this interface is associated with */
static int setup_dpni(struct fsl_mc_device *ls_dev)
{
if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
link_state_update(netdev_priv(net_dev));
+ if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED)
+ set_mac_addr(netdev_priv(net_dev));
+
return IRQ_HANDLED;
}
}
err = dpni_set_irq_mask(ls_dev->mc_io, 0, ls_dev->mc_handle,
- DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED);
+ DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED |
+ DPNI_IRQ_EVENT_ENDPOINT_CHANGED);
if (err < 0) {
dev_err(&ls_dev->dev, "dpni_set_irq_mask(): %d\n", err);
goto free_irq;
*/
#define DPNI_IRQ_INDEX 0
/**
- * IRQ event - indicates a change in link state
+ * IRQ events:
+ * indicates a change in link state
+ * indicates a change in endpoint
*/
#define DPNI_IRQ_EVENT_LINK_CHANGED 0x00000001
+#define DPNI_IRQ_EVENT_ENDPOINT_CHANGED 0x00000002
int dpni_set_irq_enable(struct fsl_mc_io *mc_io,
u32 cmd_flags,
#define HNAE3_MOD_VERSION "1.0"
+#define HNAE3_MIN_VECTOR_NUM 2 /* first one for misc, another for IO */
+
/* Device IDs */
#define HNAE3_DEV_ID_GE 0xA220
#define HNAE3_DEV_ID_25GE 0xA221
hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+ /* nic's msix numbers is always equals to the roce's. */
+ hdev->num_nic_msi = hdev->num_roce_msi;
+
/* PF should have NIC vectors and Roce vectors,
* NIC vectors are queued before Roce vectors.
*/
hdev->num_msi =
hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
+ hdev->num_nic_msi = hdev->num_msi;
+ }
+
+ if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) {
+ dev_err(&hdev->pdev->dev,
+ "Just %u msi resources, not enough for pf(min:2).\n",
+ hdev->num_nic_msi);
+ return -EINVAL;
}
return 0;
kinfo->rss_size = min_t(u16, hdev->rss_size_max,
vport->alloc_tqps / hdev->tm_info.num_tc);
+ /* ensure one to one mapping between irq and queue at default */
+ kinfo->rss_size = min_t(u16, kinfo->rss_size,
+ (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc);
+
return 0;
}
int vectors;
int i;
- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+ hdev->num_msi,
PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (vectors < 0) {
dev_err(&pdev->dev,
hdev->num_msi = vectors;
hdev->num_msi_left = vectors;
+
hdev->base_msi_vector = pdev->irq;
hdev->roce_base_vector = hdev->base_msi_vector +
hdev->roce_base_msix_offset;
int alloc = 0;
int i, j;
+ vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num);
vector_num = min(hdev->num_msi_left, vector_num);
for (j = 0; j < vector_num; j++) {
u32 base_msi_vector;
u16 *vector_status;
int *vector_irq;
+ u16 num_nic_msi; /* Num of nic vectors for this PF */
u16 num_roce_msi; /* Num of roce vectors for this PF */
int roce_base_vector;
kinfo->rss_size = kinfo->req_rss_size;
} else if (kinfo->rss_size > max_rss_size ||
(!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
+ /* if user not set rss, the rss_size should compare with the
+ * valid msi numbers to ensure one to one map between tqp and
+ * irq as default.
+ */
+ if (!kinfo->req_rss_size)
+ max_rss_size = min_t(u16, max_rss_size,
+ (hdev->num_nic_msi - 1) /
+ kinfo->num_tc);
+
/* Set to the maximum specification value (max_rss_size). */
- dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
- kinfo->rss_size, max_rss_size);
kinfo->rss_size = max_rss_size;
}
kinfo->tqp[i] = &hdev->htqp[i].q;
}
+ /* after init the max rss_size and tqps, adjust the default tqp numbers
+ * and rss size with the actual vector numbers
+ */
+ kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps);
+ kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc,
+ kinfo->rss_size);
+
return 0;
}
int alloc = 0;
int i, j;
+ vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num);
vector_num = min(hdev->num_msi_left, vector_num);
for (j = 0; j < vector_num; j++) {
int vectors;
int i;
- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B))
+ if (hnae3_dev_roce_supported(hdev))
vectors = pci_alloc_irq_vectors(pdev,
hdev->roce_base_msix_offset + 1,
hdev->num_msi,
PCI_IRQ_MSIX);
else
- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+ hdev->num_msi,
PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (vectors < 0) {
hdev->num_msi = vectors;
hdev->num_msi_left = vectors;
+
hdev->base_msi_vector = pdev->irq;
hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset;
req = (struct hclgevf_query_res_cmd *)desc.data;
- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) {
+ if (hnae3_dev_roce_supported(hdev)) {
hdev->roce_base_msix_offset =
hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
HCLGEVF_MSIX_OFT_ROCEE_M,
hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+ /* nic's msix numbers is always equals to the roce's. */
+ hdev->num_nic_msix = hdev->num_roce_msix;
+
/* VF should have NIC vectors and Roce vectors, NIC vectors
* are queued before Roce vectors. The offset is fixed to 64.
*/
hdev->num_msi =
hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+
+ hdev->num_nic_msix = hdev->num_msi;
+ }
+
+ if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) {
+ dev_err(&hdev->pdev->dev,
+ "Just %u msi resources, not enough for vf(min:2).\n",
+ hdev->num_nic_msix);
+ return -EINVAL;
}
return 0;
u16 num_msi;
u16 num_msi_left;
u16 num_msi_used;
+ u16 num_nic_msix; /* Num of nic vectors for this VF */
u16 num_roce_msix; /* Num of roce vectors for this VF */
u16 roce_base_msix_offset;
int roce_base_vector;
#define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */
+#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT
+
#define DMA_WBACK(ndev, addr, len) \
do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0)
unregister_netdev (dev);
dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma,
- lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+ lp->dma_addr, LIB82596_DMA_ATTR);
free_netdev (dev);
return 0;
}
dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma),
&lp->dma_addr, GFP_KERNEL,
- DMA_ATTR_NON_CONSISTENT);
+ LIB82596_DMA_ATTR);
if (!dma) {
printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__);
return -ENOMEM;
i = register_netdev(dev);
if (i) {
dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma),
- dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+ dma, lp->dma_addr, LIB82596_DMA_ATTR);
return i;
}
static const char sni_82596_string[] = "snirm_82596";
+#define LIB82596_DMA_ATTR 0
+
#define DMA_WBACK(priv, addr, len) do { } while (0)
#define DMA_INV(priv, addr, len) do { } while (0)
#define DMA_WBACK_INV(priv, addr, len) do { } while (0)
unregister_netdev(dev);
dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma,
- lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+ lp->dma_addr, LIB82596_DMA_ATTR);
iounmap(lp->ca);
iounmap(lp->mpu_port);
free_netdev (dev);
if (test_bit(0, &adapter->resetting) &&
adapter->reset_reason == VNIC_RESET_MOBILITY) {
- u64 val = (0xff000000) | scrq->hw_irq;
+ struct irq_desc *desc = irq_to_desc(scrq->irq);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
- rc = plpar_hcall_norets(H_EOI, val);
- if (rc)
- dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
- val, rc);
+ chip->irq_eoi(&desc->irq_data);
}
rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
ge_mode = 0;
switch (state->interface) {
case PHY_INTERFACE_MODE_MII:
+ case PHY_INTERFACE_MODE_GMII:
ge_mode = 1;
break;
case PHY_INTERFACE_MODE_REVMII:
* it means that all the previous stes are the same,
* if so, this rule is duplicated.
*/
- if (mlx5dr_ste_is_last_in_rule(nic_matcher,
- matched_ste->ste_chain_location)) {
- mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n");
- return NULL;
- }
- return matched_ste;
+ if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location))
+ return matched_ste;
+
+ mlx5dr_dbg(dmn, "Duplicate rule inserted\n");
}
if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) {
devlink = priv_to_devlink(mlxsw_sp->core);
in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core,
local_port);
+ skb_push(skb, ETH_HLEN);
devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port);
consume_skb(skb);
}
if (!is_valid_ether_addr(ndev->dev_addr))
eth_hw_addr_random(ndev);
- /* Reset the ethernet controller */
- __lpc_eth_reset(pldat);
-
/* then shut everything down to save power */
__lpc_eth_shutdown(pldat);
#define lif_to_txqcq(lif, i) ((lif)->txqcqs[i].qcq)
#define lif_to_rxqcq(lif, i) ((lif)->rxqcqs[i].qcq)
+#define lif_to_txstats(lif, i) ((lif)->txqcqs[i].stats->tx)
+#define lif_to_rxstats(lif, i) ((lif)->rxqcqs[i].stats->rx)
#define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q)
#define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q)
/* rx stats */
total += MAX_Q(lif) * IONIC_NUM_RX_STATS;
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
/* tx debug stats */
total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
IONIC_NUM_TX_Q_STATS +
*buf += ETH_GSTRING_LEN;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
snprintf(*buf, ETH_GSTRING_LEN,
"txq_%d_%s",
*buf += ETH_GSTRING_LEN;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
snprintf(*buf, ETH_GSTRING_LEN,
"rxq_%d_cq_%s",
{
struct ionic_lif_sw_stats lif_stats;
struct ionic_qcq *txqcq, *rxqcq;
+ struct ionic_tx_stats *txstats;
+ struct ionic_rx_stats *rxstats;
int i, q_num;
ionic_get_lif_stats(lif, &lif_stats);
}
for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
- txqcq = lif_to_txqcq(lif, q_num);
+ txstats = &lif_to_txstats(lif, q_num);
for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
- **buf = IONIC_READ_STAT64(&txqcq->stats->tx,
+ **buf = IONIC_READ_STAT64(txstats,
&ionic_tx_stats_desc[i]);
(*buf)++;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ txqcq = lif_to_txqcq(lif, q_num);
for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
**buf = IONIC_READ_STAT64(&txqcq->q,
&ionic_txq_stats_desc[i]);
(*buf)++;
}
for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
- **buf = txqcq->stats->tx.sg_cntr[i];
+ **buf = txstats->sg_cntr[i];
(*buf)++;
}
}
}
for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
- rxqcq = lif_to_rxqcq(lif, q_num);
+ rxstats = &lif_to_rxstats(lif, q_num);
for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
- **buf = IONIC_READ_STAT64(&rxqcq->stats->rx,
+ **buf = IONIC_READ_STAT64(rxstats,
&ionic_rx_stats_desc[i]);
(*buf)++;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ rxqcq = lif_to_rxqcq(lif, q_num);
for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
**buf = IONIC_READ_STAT64(&rxqcq->cq,
&ionic_dbg_cq_stats_desc[i]);
rtl_lock_config_regs(tp);
}
+static void rtl_jumbo_config(struct rtl8169_private *tp, int mtu)
+{
+ if (mtu > ETH_DATA_LEN)
+ rtl_hw_jumbo_enable(tp);
+ else
+ rtl_hw_jumbo_disable(tp);
+}
+
DECLARE_RTL_COND(rtl_chipcmd_cond)
{
return RTL_R8(tp, ChipCmd) & CmdReset;
static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
{
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
- if (tp->dev->mtu <= ETH_DATA_LEN) {
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
- PCI_EXP_DEVCTL_NOSNOOP_EN);
- }
}
static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
rtl_disable_clock_request(tp);
}
rtl_set_def_aspm_entry_latency(tp);
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
}
static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
/* Magic. */
RTL_W8(tp, DBG_REG, 0x20);
-
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
}
static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8168e_1);
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
rtl_disable_clock_request(tp);
/* Reset tx FIFO pointer */
rtl_ephy_init(tp, e_info_8168e_2);
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
rtl_set_rx_tx_desc_registers(tp);
rtl_lock_config_regs(tp);
+ rtl_jumbo_config(tp, tp->dev->mtu);
+
/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
RTL_R16(tp, CPlusCmd);
RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
{
struct rtl8169_private *tp = netdev_priv(dev);
- if (new_mtu > ETH_DATA_LEN)
- rtl_hw_jumbo_enable(tp);
- else
- rtl_hw_jumbo_disable(tp);
+ rtl_jumbo_config(tp, new_mtu);
dev->mtu = new_mtu;
netdev_update_features(dev);
}
}
} else {
- netdev_info(dev, "Too many address, switching to promiscuous\n");
+ if (!(readl(ioaddr + EMAC_RX_FRM_FLT) & EMAC_FRM_FLT_RXALL))
+ netdev_info(dev, "Too many address, switching to promiscuous\n");
v = EMAC_FRM_FLT_RXALL;
}
value |= GMAC_PACKET_FILTER_HPF;
/* Handle multiple unicast addresses */
- if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) {
+ if (netdev_uc_count(dev) > hw->unicast_filter_entries) {
/* Switch to promiscuous mode if more than 128 addrs
* are required
*/
if (!enable) {
val |= PPSCMDx(index, 0x5);
+ val |= PPSEN0;
writel(val, ioaddr + MAC_PPS_CONTROL);
return 0;
}
}
if (priv->hw->pcs)
- stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0);
+ stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
/* set TX and RX rings length */
stmmac_set_rings_length(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
pinctrl_pm_select_sleep_state(priv->device);
/* Disable clock in case of PWM is off */
- clk_disable(priv->plat->pclk);
- clk_disable(priv->plat->stmmac_clk);
+ if (priv->plat->clk_ptp_ref)
+ clk_disable_unprepare(priv->plat->clk_ptp_ref);
+ clk_disable_unprepare(priv->plat->pclk);
+ clk_disable_unprepare(priv->plat->stmmac_clk);
}
mutex_unlock(&priv->lock);
} else {
pinctrl_pm_select_default_state(priv->device);
/* enable the clk previously disabled */
- clk_enable(priv->plat->stmmac_clk);
- clk_enable(priv->plat->pclk);
+ clk_prepare_enable(priv->plat->stmmac_clk);
+ clk_prepare_enable(priv->plat->pclk);
+ if (priv->plat->clk_ptp_ref)
+ clk_prepare_enable(priv->plat->clk_ptp_ref);
/* reset the phy so that it's ready */
if (priv->mii)
stmmac_mdio_reset(priv->mii);
/* structure describing a PTP hardware clock */
static struct ptp_clock_info stmmac_ptp_clock_ops = {
.owner = THIS_MODULE,
- .name = "stmmac_ptp_clock",
+ .name = "stmmac ptp",
.max_adj = 62500000,
.n_alarm = 0,
.n_ext_ts = 0,
static int stmmac_test_hfilt(struct stmmac_priv *priv)
{
- unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd};
- unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb};
+ unsigned char gd_addr[ETH_ALEN] = {0x01, 0xee, 0xdd, 0xcc, 0xbb, 0xaa};
+ unsigned char bd_addr[ETH_ALEN] = {0x01, 0x01, 0x02, 0x03, 0x04, 0x05};
struct stmmac_packet_attrs attr = { };
int ret;
if (ret)
return ret;
+ if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
+ return -EOPNOTSUPP;
+
ret = dev_mc_add(priv->dev, gd_addr);
if (ret)
return ret;
if (stmmac_filter_check(priv))
return -EOPNOTSUPP;
+ if (!priv->hw->multicast_filter_bins)
+ return -EOPNOTSUPP;
/* Remove all MC addresses */
__dev_mc_unsync(priv->dev, NULL);
if (stmmac_filter_check(priv))
return -EOPNOTSUPP;
+ if (!priv->hw->multicast_filter_bins)
+ return -EOPNOTSUPP;
/* Remove all UC addresses */
__dev_uc_unsync(priv->dev, NULL);
return NULL;
}
-struct {
+static struct {
int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls,
struct stmmac_flow_entry *entry);
} tc_flow_parsers[] = {
* cpdma_chan_split_pool - Splits ctrl pool between all channels.
* Has to be called under ctlr lock
*/
-int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
{
int tx_per_ch_desc = 0, rx_per_ch_desc = 0;
int free_rx_num = 0, free_tx_num = 0;
void nsim_fib_exit(void)
{
- unregister_pernet_subsys(&nsim_fib_net_ops);
unregister_fib_notifier(&nsim_fib_nb);
+ unregister_pernet_subsys(&nsim_fib_net_ops);
}
int nsim_fib_init(void)
err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent);
if (err < 0) {
pr_err("Failed to register fib notifier\n");
+ unregister_pernet_subsys(&nsim_fib_net_ops);
goto err_out;
}
.name = _name, \
/* PHY_BASIC_FEATURES */ \
.flags = PHY_IS_INTERNAL, \
+ .soft_reset = genphy_soft_reset, \
.config_init = bcm7xxx_config_init, \
.suspend = bcm7xxx_suspend, \
.resume = bcm7xxx_config_init, \
return genphy_config_aneg(phydev);
}
+static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
+ const u32 ksz_phy_id)
+{
+ int ret;
+
+ if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id)
+ return 0;
+
+ ret = phy_read(phydev, MII_BMSR);
+ if (ret < 0)
+ return ret;
+
+ /* KSZ8051 PHY and KSZ8794/KSZ8795/KSZ8765 switch share the same
+ * exact PHY ID. However, they can be told apart by the extended
+ * capability registers presence. The KSZ8051 PHY has them while
+ * the switch does not.
+ */
+ ret &= BMSR_ERCAP;
+ if (ksz_phy_id == PHY_ID_KSZ8051)
+ return ret;
+ else
+ return !ret;
+}
+
+static int ksz8051_match_phy_device(struct phy_device *phydev)
+{
+ return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051);
+}
+
static int ksz8081_config_init(struct phy_device *phydev)
{
/* KSZPHY_OMSO_FACTORY_TEST is set at de-assertion of the reset line
return kszphy_config_init(phydev);
}
+static int ksz8795_match_phy_device(struct phy_device *phydev)
+{
+ return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX);
+}
+
static int ksz9021_load_values_from_of(struct phy_device *phydev,
const struct device_node *of_node,
u16 reg,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
- .phy_id = PHY_ID_KSZ8051,
- .phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8051",
/* PHY_BASIC_FEATURES */
.driver_data = &ksz8051_type,
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
+ .match_phy_device = ksz8051_match_phy_device,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
- .phy_id = PHY_ID_KSZ8795,
- .phy_id_mask = MICREL_PHY_ID_MASK,
- .name = "Micrel KSZ8795",
+ .name = "Micrel KSZ87XX Switch",
/* PHY_BASIC_FEATURES */
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
+ .match_phy_device = ksz8795_match_phy_device,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
{
int val;
+ linkmode_zero(phydev->lp_advertising);
+
val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1);
if (val < 0)
return val;
if (AUTONEG_DISABLE == phydev->autoneg)
phy_sanitize_settings(phydev);
- /* Invalidate LP advertising flags */
- linkmode_zero(phydev->lp_advertising);
-
err = phy_config_aneg(phydev);
if (err < 0)
goto out_unlock;
{
int lpa, lpagb;
- if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
+ if (phydev->autoneg == AUTONEG_ENABLE) {
+ if (!phydev->autoneg_complete) {
+ mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising,
+ 0);
+ mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, 0);
+ return 0;
+ }
+
if (phydev->is_gigabit_capable) {
lpagb = phy_read(phydev, MII_STAT1000);
if (lpagb < 0)
return lpa;
mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa);
+ } else {
+ linkmode_zero(phydev->lp_advertising);
}
return 0;
/**
* phylink_create() - create a phylink instance
- * @ndev: a pointer to the &struct net_device
+ * @config: a pointer to the target &struct phylink_config
* @fwnode: a pointer to a &struct fwnode_handle describing the network
* interface
* @iface: the desired link mode defined by &typedef phy_interface_t
e = tun_flow_find(head, rxhash);
if (likely(e)) {
/* TODO: keep queueing to old queue until it's empty? */
- if (e->queue_index != queue_index)
- e->queue_index = queue_index;
+ if (READ_ONCE(e->queue_index) != queue_index)
+ WRITE_ONCE(e->queue_index, queue_index);
if (e->updated != jiffies)
e->updated = jiffies;
sock_rps_record_flow_hash(e->rps_rxhash);
int intr_completed;
struct usb_endpoint_descriptor *endp;
struct urb *urb;
- struct hso_serial_state_notification serial_state_notification;
+ struct hso_serial_state_notification *serial_state_notification;
u16 prev_UART_state_bitmap;
struct uart_icount icount;
};
usb_rcvintpipe(usb,
tiocmget->endp->
bEndpointAddress & 0x7F),
- &tiocmget->serial_state_notification,
+ tiocmget->serial_state_notification,
sizeof(struct hso_serial_state_notification),
tiocmget_intr_callback, serial,
tiocmget->endp->bInterval);
/* wIndex should be the USB interface number of the port to which the
* notification applies, which should always be the Modem port.
*/
- serial_state_notification = &tiocmget->serial_state_notification;
+ serial_state_notification = tiocmget->serial_state_notification;
if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE ||
serial_state_notification->bNotification != B_NOTIFICATION ||
le16_to_cpu(serial_state_notification->wValue) != W_VALUE ||
usb_free_urb(tiocmget->urb);
tiocmget->urb = NULL;
serial->tiocmget = NULL;
+ kfree(tiocmget->serial_state_notification);
+ tiocmget->serial_state_notification = NULL;
kfree(tiocmget);
}
}
num_urbs = 2;
serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
GFP_KERNEL);
+ serial->tiocmget->serial_state_notification
+ = kzalloc(sizeof(struct hso_serial_state_notification),
+ GFP_KERNEL);
/* it isn't going to break our heart if serial->tiocmget
* allocation fails don't bother checking this.
*/
- if (serial->tiocmget) {
+ if (serial->tiocmget && serial->tiocmget->serial_state_notification) {
tiocmget = serial->tiocmget;
tiocmget->endp = hso_get_ep(interface,
USB_ENDPOINT_XFER_INT,
/* driver requires remote-wakeup capability during autosuspend. */
intf->needs_remote_wakeup = 1;
+ ret = lan78xx_phy_init(dev);
+ if (ret < 0)
+ goto out4;
+
ret = register_netdev(netdev);
if (ret != 0) {
netif_err(dev, probe, netdev, "couldn't register the device\n");
- goto out4;
+ goto out5;
}
usb_set_intfdata(intf, dev);
pm_runtime_set_autosuspend_delay(&udev->dev,
DEFAULT_AUTOSUSPEND_DELAY);
- ret = lan78xx_phy_init(dev);
- if (ret < 0)
- goto out5;
-
return 0;
out5:
- unregister_netdev(netdev);
+ phy_disconnect(netdev->phydev);
out4:
usb_free_urb(dev->urb_intr);
out3:
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
{QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */
{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
static int sr_mdio_read(struct net_device *net, int phy_id, int loc)
{
struct usbnet *dev = netdev_priv(net);
- __le16 res;
+ __le16 res = 0;
mutex_lock(&dev->phy_mutex);
sr_set_sw_mii(dev);
return ret;
}
- if (!uart_print && ar->hw_params.uart_pin_workaround) {
- ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin,
- ar->hw_params.uart_pin);
- if (ret) {
- ath10k_warn(ar, "failed to set UART TX pin: %d", ret);
- return ret;
+ if (!uart_print) {
+ if (ar->hw_params.uart_pin_workaround) {
+ ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin,
+ ar->hw_params.uart_pin);
+ if (ret) {
+ ath10k_warn(ar, "failed to set UART TX pin: %d",
+ ret);
+ return ret;
+ }
}
return 0;
wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE,
&tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
- if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_ECKV_WIFI_DATA_SIZE,
&tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
- if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
if (new_page)
__free_page(new_page);
}
+ kfree(table);
return NULL;
}
alloc_size = min_t(int, size, PAGE_SIZE);
*/
static inline u32 iwl_umac_prph(struct iwl_trans *trans, u32 ofs)
{
- return ofs + trans->cfg->trans.umac_prph_offset;
+ return ofs + trans->trans_cfg->umac_prph_offset;
}
static inline u32 iwl_read_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs)
{
return iwl_read_prph_no_grab(trans, ofs +
- trans->cfg->trans.umac_prph_offset);
+ trans->trans_cfg->umac_prph_offset);
}
static inline u32 iwl_read_umac_prph(struct iwl_trans *trans, u32 ofs)
{
- return iwl_read_prph(trans, ofs + trans->cfg->trans.umac_prph_offset);
+ return iwl_read_prph(trans, ofs + trans->trans_cfg->umac_prph_offset);
}
static inline void iwl_write_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs,
u32 val)
{
- iwl_write_prph_no_grab(trans, ofs + trans->cfg->trans.umac_prph_offset,
+ iwl_write_prph_no_grab(trans, ofs + trans->trans_cfg->umac_prph_offset,
val);
}
static inline void iwl_write_umac_prph(struct iwl_trans *trans, u32 ofs,
u32 val)
{
- iwl_write_prph(trans, ofs + trans->cfg->trans.umac_prph_offset, val);
+ iwl_write_prph(trans, ofs + trans->trans_cfg->umac_prph_offset, val);
}
static inline int iwl_poll_umac_prph_bit(struct iwl_trans *trans, u32 addr,
u32 bits, u32 mask, int timeout)
{
return iwl_poll_prph_bit(trans, addr +
- trans->cfg->trans.umac_prph_offset,
+ trans->trans_cfg->umac_prph_offset,
bits, mask, timeout);
}
};
int ret;
+ if (mvm->trans->cfg->tx_with_siso_diversity)
+ init_cfg.init_flags |= cpu_to_le32(BIT(IWL_INIT_PHY));
+
lockdep_assert_held(&mvm->mutex);
mvm->rfkill_safe_init_done = false;
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_WRDS_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
- if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_EWRD_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
if ((wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) ||
- (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER)) {
+ (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER) ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_WGDS_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev > 1) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
+ if (tbl_rev != 0) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
mvm->geo_rev = tbl_rev;
for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) {
* firmware versions. Unfortunately, we don't have a TLV API
* flag to rely on, so rely on the major version which is in
* the first byte of ucode_ver. This was implemented
- * initially on version 38 and then backported to29 and 17.
- * The intention was to have it in 36 as well, but not all
- * 8000 family got this feature enabled. The 8000 family is
- * the only one using version 36, so skip this version
- * entirely.
+ * initially on version 38 and then backported to 17. It was
+ * also backported to 29, but only for 7265D devices. The
+ * intention was to have it in 36 as well, but not all 8000
+ * family got this feature enabled. The 8000 family is the
+ * only one using version 36, so skip this version entirely.
*/
return IWL_UCODE_SERIAL(mvm->fw->ucode_ver) >= 38 ||
- IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 ||
- IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17;
+ IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17 ||
+ (IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 &&
+ ((mvm->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
+ CSR_HW_REV_TYPE_7265D));
}
int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_PPAG_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
+ if (tbl_rev != 0) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
enabled = &wifi_pkg->package.elements[1];
if (enabled->type != ACPI_TYPE_INTEGER ||
(enabled->integer.value != 0 && enabled->integer.value != 1)) {
if (!iwl_mvm_has_new_rx_api(mvm))
return;
- notif->cookie = mvm->queue_sync_cookie;
-
- if (notif->sync)
+ if (notif->sync) {
+ notif->cookie = mvm->queue_sync_cookie;
atomic_set(&mvm->queue_sync_counter,
mvm->trans->num_rx_queues);
+ }
ret = iwl_mvm_notify_rx_queue(mvm, qmask, (u8 *)notif,
size, !notif->sync);
out:
atomic_set(&mvm->queue_sync_counter, 0);
- mvm->queue_sync_cookie++;
+ if (notif->sync)
+ mvm->queue_sync_cookie++;
}
static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw)
/* allocate ucode sections in dram and set addresses */
ret = iwl_pcie_init_fw_sec(trans, fw, &prph_scratch->dram);
- if (ret) {
- dma_free_coherent(trans->dev,
- sizeof(*prph_scratch),
- prph_scratch,
- trans_pcie->prph_scratch_dma_addr);
- return ret;
- }
+ if (ret)
+ goto err_free_prph_scratch;
+
/* Allocate prph information
* currently we don't assign to the prph info anything, but it would get
prph_info = dma_alloc_coherent(trans->dev, sizeof(*prph_info),
&trans_pcie->prph_info_dma_addr,
GFP_KERNEL);
- if (!prph_info)
- return -ENOMEM;
+ if (!prph_info) {
+ ret = -ENOMEM;
+ goto err_free_prph_scratch;
+ }
/* Allocate context info */
ctxt_info_gen3 = dma_alloc_coherent(trans->dev,
sizeof(*ctxt_info_gen3),
&trans_pcie->ctxt_info_dma_addr,
GFP_KERNEL);
- if (!ctxt_info_gen3)
- return -ENOMEM;
+ if (!ctxt_info_gen3) {
+ ret = -ENOMEM;
+ goto err_free_prph_info;
+ }
ctxt_info_gen3->prph_info_base_addr =
cpu_to_le64(trans_pcie->prph_info_dma_addr);
iwl_set_bit(trans, CSR_GP_CNTRL, CSR_AUTO_FUNC_INIT);
return 0;
+
+err_free_prph_info:
+ dma_free_coherent(trans->dev,
+ sizeof(*prph_info),
+ prph_info,
+ trans_pcie->prph_info_dma_addr);
+
+err_free_prph_scratch:
+ dma_free_coherent(trans->dev,
+ sizeof(*prph_scratch),
+ prph_scratch,
+ trans_pcie->prph_scratch_dma_addr);
+ return ret;
+
}
void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans)
{IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)},
/* 9000 Series */
- {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x1551, iwl9560_killer_s_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x1552, iwl9560_killer_i_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
{IWL_PCI_DEVICE(0x06F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
{IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
{IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)},
+
+ {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_160_cfg_shared_clk)},
{IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)},
{IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_160_cfg_shared_clk)},
{IWL_PCI_DEVICE(0x34F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
{IWL_PCI_DEVICE(0x34F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_soc)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
+ {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_160_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x4234, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_soc)},
+
+ {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_160_cfg_soc)},
{IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_160_cfg_soc)},
ptr = cmdq->write_ptr;
for (i = 0; i < cmdq->n_window; i++) {
u8 idx = iwl_pcie_get_cmd_index(cmdq, ptr);
+ u8 tfdidx;
u32 caplen, cmdlen;
+ if (trans->trans_cfg->use_tfh)
+ tfdidx = idx;
+ else
+ tfdidx = ptr;
+
cmdlen = iwl_trans_pcie_get_cmdlen(trans,
- cmdq->tfds +
- tfd_size * ptr);
+ (u8 *)cmdq->tfds +
+ tfd_size * tfdidx);
caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen);
if (cmdlen) {
spin_lock_init(&trans_pcie->reg_lock);
mutex_init(&trans_pcie->mutex);
init_waitqueue_head(&trans_pcie->ucode_write_waitq);
+
+ trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator",
+ WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (!trans_pcie->rba.alloc_wq) {
+ ret = -ENOMEM;
+ goto out_free_trans;
+ }
+ INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work);
+
trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page);
if (!trans_pcie->tso_hdr_page) {
ret = -ENOMEM;
trans_pcie->inta_mask = CSR_INI_SET_MASK;
}
- trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator",
- WQ_HIGHPRI | WQ_UNBOUND, 1);
- INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work);
-
#ifdef CONFIG_IWLWIFI_DEBUGFS
trans_pcie->fw_mon_data.state = IWL_FW_MON_DBGFS_STATE_CLOSED;
mutex_init(&trans_pcie->fw_mon_data.mutex);
iwl_pcie_free_ict(trans);
out_no_pci:
free_percpu(trans_pcie->tso_hdr_page);
+ destroy_workqueue(trans_pcie->rba.alloc_wq);
+out_free_trans:
iwl_trans_free(trans);
return ERR_PTR(ret);
}
err = dev_alloc_name(hwsim_mon, hwsim_mon->name);
if (err < 0) {
rtnl_unlock();
- goto out_free_radios;
+ goto out_free_mon;
}
err = register_netdevice(hwsim_mon);
#include <linux/leds.h>
#include <linux/mutex.h>
#include <linux/etherdevice.h>
-#include <linux/input-polldev.h>
#include <linux/kfifo.h>
#include <linux/hrtimer.h>
#include <linux/average.h>
{
struct rt2x00debug_intf *intf = file->private_data;
struct rt2x00_dev *rt2x00dev = intf->rt2x00dev;
- static unsigned long last_reset;
+ static unsigned long last_reset = INITIAL_JIFFIES;
if (!rt2x00_has_cap_restart_hw(rt2x00dev))
return -EOPNOTSUPP;
xenvif_unmap_frontend_data_rings(queue);
netif_napi_del(&queue->napi);
err:
- module_put(THIS_MODULE);
return err;
}
rc = pn533_finalize_setup(priv);
if (rc)
- goto error;
+ goto err_deregister;
usb_set_intfdata(interface, phy);
return 0;
+err_deregister:
+ pn533_unregister_device(phy->priv);
error:
+ usb_kill_urb(phy->in_urb);
+ usb_kill_urb(phy->out_urb);
+ usb_kill_urb(phy->ack_urb);
+
usb_free_urb(phy->in_urb);
usb_free_urb(phy->out_urb);
usb_free_urb(phy->ack_urb);
usb_put_dev(phy->udev);
kfree(in_buf);
+ kfree(phy->ack_buffer);
return rc;
}
/*
* Only new queue scan work when admin and IO queues are both alive
*/
- if (ctrl->state == NVME_CTRL_LIVE)
+ if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}
+/*
+ * Use this function to proceed with scheduling reset_work for a controller
+ * that had previously been set to the resetting state. This is intended for
+ * code paths that can't be interrupted by other reset attempts. A hot removal
+ * may prevent this from succeeding.
+ */
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->state != NVME_CTRL_RESETTING)
+ return -EBUSY;
+ if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
+
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
- if (ctrl->state != NVME_CTRL_LIVE &&
- ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ if (ctrl->state != NVME_CTRL_LIVE)
ret = -ENETRESET;
}
old_state = ctrl->state;
switch (new_state) {
- case NVME_CTRL_ADMIN_ONLY:
- switch (old_state) {
- case NVME_CTRL_CONNECTING:
- changed = true;
- /* FALLTHRU */
- default:
- break;
- }
- break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
changed = true;
break;
}
- if (changed)
+ if (changed) {
ctrl->state = new_state;
+ wake_up_all(&ctrl->state_wq);
+ }
spin_unlock_irqrestore(&ctrl->lock, flags);
if (changed && ctrl->state == NVME_CTRL_LIVE)
}
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
+/*
+ * Returns true for sink states that can't ever transition back to live.
+ */
+static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+{
+ switch (ctrl->state) {
+ case NVME_CTRL_NEW:
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_CONNECTING:
+ return false;
+ case NVME_CTRL_DELETING:
+ case NVME_CTRL_DEAD:
+ return true;
+ default:
+ WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
+ return true;
+ }
+}
+
+/*
+ * Waits for the controller state to be resetting, or returns false if it is
+ * not possible to ever transition to that state.
+ */
+bool nvme_wait_reset(struct nvme_ctrl *ctrl)
+{
+ wait_event(ctrl->state_wq,
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
+ nvme_state_terminal(ctrl));
+ return ctrl->state == NVME_CTRL_RESETTING;
+}
+EXPORT_SYMBOL_GPL(nvme_wait_reset);
+
static void nvme_free_ns_head(struct kref *ref)
{
struct nvme_ns_head *head =
if (ns->disk && nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
up_read(&ctrl->namespaces_rwsem);
-
- nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
+ nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
switch (ctrl->state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
break;
default:
return -EWOULDBLOCK;
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
- [NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
struct nvme_id_ctrl *id;
unsigned nn;
- if (ctrl->state != NVME_CTRL_LIVE)
+ /* No tagset on a live ctrl means IO queues could not created */
+ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return;
- WARN_ON_ONCE(!ctrl->tagset);
-
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
- nvme_reset_ctrl(ctrl);
- break;
+ nvme_try_sched_reset(ctrl);
+ return;
}
msleep(100);
}
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
nvme_start_queues(ctrl);
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
- queue_work(nvme_wq, &ctrl->fw_act_work);
+ /*
+ * We are (ab)using the RESETTING state to prevent subsequent
+ * recovery actions from interfering with the controller's
+ * firmware activation.
+ */
+ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
+ init_waitqueue_head(&ctrl->state_wq);
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
- if (likely(ctrl->state == NVME_CTRL_LIVE ||
- ctrl->state == NVME_CTRL_ADMIN_ONLY))
+ if (likely(ctrl->state == NVME_CTRL_LIVE))
return true;
return __nvmf_check_ready(ctrl, rq, queue_live);
}
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
+#include <linux/wait.h>
#include <trace/events/block.h>
enum nvme_ctrl_state {
NVME_CTRL_NEW,
NVME_CTRL_LIVE,
- NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING,
NVME_CTRL_CONNECTING,
NVME_CTRL_DELETING,
struct cdev cdev;
struct work_struct reset_work;
struct work_struct delete_work;
+ wait_queue_head_t state_wq;
struct nvme_subsystem *subsys;
struct list_head subsys_entry;
bool nvme_cancel_request(struct request *req, void *data, bool reserved);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state);
+bool nvme_wait_reset(struct nvme_ctrl *ctrl);
int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
struct bio_vec *bv)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset;
+ unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1);
+ unsigned int first_prp_len = dev->ctrl.page_size - offset;
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->first_dma))
return true;
}
-/*
- * return error value only when tagset allocation failed
- */
-static int nvme_dev_add(struct nvme_dev *dev)
+static void nvme_dev_add(struct nvme_dev *dev)
{
int ret;
if (ret) {
dev_warn(dev->ctrl.device,
"IO queues tagset allocation failed %d\n", ret);
- return ret;
+ return;
}
dev->ctrl.tagset = &dev->tagset;
} else {
}
nvme_dbbuf_set(dev);
- return 0;
}
static int nvme_pci_enable(struct nvme_dev *dev)
mutex_unlock(&dev->shutdown_lock);
}
+static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
+{
+ if (!nvme_wait_reset(&dev->ctrl))
+ return -EBUSY;
+ nvme_dev_disable(dev, shutdown);
+ return 0;
+}
+
static int nvme_setup_prp_pools(struct nvme_dev *dev)
{
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
dma_pool_destroy(dev->prp_small_pool);
}
+static void nvme_free_tagset(struct nvme_dev *dev)
+{
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ dev->ctrl.tagset = NULL;
+}
+
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev);
put_device(dev->dev);
- if (dev->tagset.tags)
- blk_mq_free_tag_set(&dev->tagset);
+ nvme_free_tagset(dev);
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
+ /*
+ * Set state to deleting now to avoid blocking nvme_wait_reset(), which
+ * may be holding this pci_dev's device lock.
+ */
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
nvme_kill_queues(&dev->ctrl);
container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result;
- enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
result = -ENODEV;
dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_kill_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
- new_state = NVME_CTRL_ADMIN_ONLY;
+ nvme_free_tagset(dev);
} else {
nvme_start_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
- /* hit this only when allocate tagset fails */
- if (nvme_dev_add(dev))
- new_state = NVME_CTRL_ADMIN_ONLY;
+ nvme_dev_add(dev);
nvme_unfreeze(&dev->ctrl);
}
* If only admin queue live, keep it to do further investigation or
* recovery.
*/
- if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
dev_warn(dev->ctrl.device,
- "failed to mark controller state %d\n", new_state);
+ "failed to mark controller live state\n");
result = -ENODEV;
goto out;
}
static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
{
- *val = readq(to_nvme_dev(ctrl)->bar + off);
+ *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
return 0;
}
static void nvme_reset_prepare(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, false);
+
+ /*
+ * We don't need to check the return value from waiting for the reset
+ * state as pci_dev device lock is held, making it impossible to race
+ * with ->remove().
+ */
+ nvme_disable_prepare_reset(dev, false);
+ nvme_sync_queues(&dev->ctrl);
}
static void nvme_reset_done(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_reset_ctrl_sync(&dev->ctrl);
+
+ if (!nvme_try_sched_reset(&dev->ctrl))
+ flush_work(&dev->ctrl.reset_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, true);
+ nvme_disable_prepare_reset(dev, true);
}
/*
if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
- nvme_reset_ctrl(ctrl);
+ return nvme_try_sched_reset(&ndev->ctrl);
return 0;
}
*/
if (pm_suspend_via_firmware() || !ctrl->npss ||
!pcie_aspm_enabled(pdev) ||
- (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) {
- nvme_dev_disable(ndev, true);
- return 0;
- }
+ (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
+ return nvme_disable_prepare_reset(ndev, true);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
nvme_sync_queues(ctrl);
- if (ctrl->state != NVME_CTRL_LIVE &&
- ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ if (ctrl->state != NVME_CTRL_LIVE)
goto unfreeze;
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
* Clearing npss forces a controller reset on resume. The
* correct value will be resdicovered then.
*/
- nvme_dev_disable(ndev, true);
+ ret = nvme_disable_prepare_reset(ndev, true);
ctrl->npss = 0;
- ret = 0;
}
unfreeze:
nvme_unfreeze(ctrl);
static int nvme_simple_suspend(struct device *dev)
{
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
-
- nvme_dev_disable(ndev, true);
- return 0;
+ return nvme_disable_prepare_reset(ndev, true);
}
static int nvme_simple_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_reset_ctrl(&ndev->ctrl);
- return 0;
+ return nvme_try_sched_reset(&ndev->ctrl);
}
static const struct dev_pm_ops nvme_dev_pm_ops = {
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));
+ /*
+ * Restart the timer if a controller reset is already scheduled. Any
+ * timed out commands would be handled before entering the connecting
+ * state.
+ */
+ if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+ return BLK_EH_RESET_TIMER;
+
if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
/*
* Teardown immediately if controller times out while starting
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
queue->sock->sk->sk_state_change = nvme_tcp_state_change;
queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
queue->sock->sk->sk_ll_usec = 1;
+#endif
write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+ /*
+ * Restart the timer if a controller reset is already scheduled. Any
+ * timed out commands would be handled before entering the connecting
+ * state.
+ */
+ if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+ return BLK_EH_RESET_TIMER;
+
dev_warn(ctrl->ctrl.device,
"queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
ret = nvme_tcp_map_data(queue, rq);
if (unlikely(ret)) {
+ nvme_cleanup_cmd(rq);
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", ret);
return ret;
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl, SG_CHUNK_SIZE))
+ iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+ nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE;
+ }
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
return(0);
}
- /* Documentation/DMA-API-HOWTO.txt tells drivers to try 64-bit
- * first, then fall back to 32-bit if that fails.
- * We are just "encouraging" 32-bit DMA masks here since we can
- * never allow IOMMU bypass unless we add special support for ZX1.
- */
- if (mask > ~0U)
- return 0;
-
ioc = GET_IOC(dev);
if (!ioc)
return 0;
pci_update_current_state(dev, dev->current_state);
}
-/**
- * pci_power_up - Put the given device into D0 forcibly
- * @dev: PCI device to power up
- */
-void pci_power_up(struct pci_dev *dev)
-{
- if (platform_pci_power_manageable(dev))
- platform_pci_set_power_state(dev, PCI_D0);
-
- pci_raw_set_power_state(dev, PCI_D0);
- pci_update_current_state(dev, PCI_D0);
-}
-
/**
* pci_platform_power_transition - Use platform to change device power state
* @dev: PCI device to handle.
}
EXPORT_SYMBOL(pci_set_power_state);
+/**
+ * pci_power_up - Put the given device into D0 forcibly
+ * @dev: PCI device to power up
+ */
+void pci_power_up(struct pci_dev *dev)
+{
+ __pci_start_power_transition(dev, PCI_D0);
+ pci_raw_set_power_state(dev, PCI_D0);
+ pci_update_current_state(dev, PCI_D0);
+}
+
/**
* pci_choose_state - Choose the power state of a PCI device
* @dev: PCI device to be suspended
#define K25 7
SIG_EXPR_LIST_DECL_SESG(K25, MACLINK4, MACLINK4, SIG_DESC_SET(SCU410, 7));
-SIG_EXPR_LIST_DECL_SESG(K25, SDA14, SDA14, SIG_DESC_SET(SCU4B0, 7));
+SIG_EXPR_LIST_DECL_SESG(K25, SDA14, I2C14, SIG_DESC_SET(SCU4B0, 7));
PIN_DECL_2(K25, GPIOA7, MACLINK4, SDA14);
FUNC_GROUP_DECL(MACLINK4, K25);
#define AD11 206
SIG_EXPR_LIST_DECL_SEMG(AD11, SPI1DQ2, QSPI1, SPI1, SIG_DESC_SET(SCU438, 14));
SIG_EXPR_LIST_DECL_SEMG(AD11, TXD13, UART13G1, UART13,
- SIG_DESC_SET(SCU438, 14));
+ SIG_DESC_CLEAR(SCU4B8, 2), SIG_DESC_SET(SCU4D8, 14));
PIN_DECL_2(AD11, GPIOZ6, SPI1DQ2, TXD13);
#define AF10 207
SIG_EXPR_LIST_DECL_SEMG(AF10, SPI1DQ3, QSPI1, SPI1, SIG_DESC_SET(SCU438, 15));
SIG_EXPR_LIST_DECL_SEMG(AF10, RXD13, UART13G1, UART13,
- SIG_DESC_SET(SCU438, 15));
+ SIG_DESC_CLEAR(SCU4B8, 3), SIG_DESC_SET(SCU4D8, 15));
PIN_DECL_2(AF10, GPIOZ7, SPI1DQ3, RXD13);
GROUP_DECL(QSPI1, AB11, AC11, AA11, AD11, AF10);
FUNC_GROUP_DECL(RMII2, D4, C2, C1, D3, D2, D1, F4, E2, E1);
#define AB4 232
-SIG_EXPR_LIST_DECL_SESG(AB4, SD3CLK, SD3, SIG_DESC_SET(SCU400, 24));
-PIN_DECL_1(AB4, GPIO18D0, SD3CLK);
+SIG_EXPR_LIST_DECL_SEMG(AB4, EMMCCLK, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 24));
+PIN_DECL_1(AB4, GPIO18D0, EMMCCLK);
#define AA4 233
-SIG_EXPR_LIST_DECL_SESG(AA4, SD3CMD, SD3, SIG_DESC_SET(SCU400, 25));
-PIN_DECL_1(AA4, GPIO18D1, SD3CMD);
+SIG_EXPR_LIST_DECL_SEMG(AA4, EMMCCMD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 25));
+PIN_DECL_1(AA4, GPIO18D1, EMMCCMD);
#define AC4 234
-SIG_EXPR_LIST_DECL_SESG(AC4, SD3DAT0, SD3, SIG_DESC_SET(SCU400, 26));
-PIN_DECL_1(AC4, GPIO18D2, SD3DAT0);
+SIG_EXPR_LIST_DECL_SEMG(AC4, EMMCDAT0, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 26));
+PIN_DECL_1(AC4, GPIO18D2, EMMCDAT0);
#define AA5 235
-SIG_EXPR_LIST_DECL_SESG(AA5, SD3DAT1, SD3, SIG_DESC_SET(SCU400, 27));
-PIN_DECL_1(AA5, GPIO18D3, SD3DAT1);
+SIG_EXPR_LIST_DECL_SEMG(AA5, EMMCDAT1, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 27));
+PIN_DECL_1(AA5, GPIO18D3, EMMCDAT1);
#define Y5 236
-SIG_EXPR_LIST_DECL_SESG(Y5, SD3DAT2, SD3, SIG_DESC_SET(SCU400, 28));
-PIN_DECL_1(Y5, GPIO18D4, SD3DAT2);
+SIG_EXPR_LIST_DECL_SEMG(Y5, EMMCDAT2, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 28));
+PIN_DECL_1(Y5, GPIO18D4, EMMCDAT2);
#define AB5 237
-SIG_EXPR_LIST_DECL_SESG(AB5, SD3DAT3, SD3, SIG_DESC_SET(SCU400, 29));
-PIN_DECL_1(AB5, GPIO18D5, SD3DAT3);
+SIG_EXPR_LIST_DECL_SEMG(AB5, EMMCDAT3, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 29));
+PIN_DECL_1(AB5, GPIO18D5, EMMCDAT3);
#define AB6 238
-SIG_EXPR_LIST_DECL_SESG(AB6, SD3CD, SD3, SIG_DESC_SET(SCU400, 30));
-PIN_DECL_1(AB6, GPIO18D6, SD3CD);
+SIG_EXPR_LIST_DECL_SEMG(AB6, EMMCCD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 30));
+PIN_DECL_1(AB6, GPIO18D6, EMMCCD);
#define AC5 239
-SIG_EXPR_LIST_DECL_SESG(AC5, SD3WP, SD3, SIG_DESC_SET(SCU400, 31));
-PIN_DECL_1(AC5, GPIO18D7, SD3WP);
+SIG_EXPR_LIST_DECL_SEMG(AC5, EMMCWP, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 31));
+PIN_DECL_1(AC5, GPIO18D7, EMMCWP);
-FUNC_GROUP_DECL(SD3, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5);
+GROUP_DECL(EMMCG1, AB4, AA4, AC4, AB6, AC5);
+GROUP_DECL(EMMCG4, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5);
#define Y1 240
SIG_EXPR_LIST_DECL_SEMG(Y1, FWSPIDCS, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y1, VBCS, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y1, SD3DAT4, SD3DAT4, SIG_DESC_SET(SCU404, 0));
-PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, SD3DAT4);
-FUNC_GROUP_DECL(SD3DAT4, Y1);
+SIG_EXPR_LIST_DECL_SEMG(Y1, EMMCDAT4, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 0));
+PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, EMMCDAT4);
#define Y2 241
SIG_EXPR_LIST_DECL_SEMG(Y2, FWSPIDCK, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y2, VBCK, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y2, SD3DAT5, SD3DAT5, SIG_DESC_SET(SCU404, 1));
-PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, SD3DAT5);
-FUNC_GROUP_DECL(SD3DAT5, Y2);
+SIG_EXPR_LIST_DECL_SEMG(Y2, EMMCDAT5, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 1));
+PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, EMMCDAT5);
#define Y3 242
SIG_EXPR_LIST_DECL_SEMG(Y3, FWSPIDMOSI, FWSPID, FWSPID,
SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y3, VBMOSI, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y3, SD3DAT6, SD3DAT6, SIG_DESC_SET(SCU404, 2));
-PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, SD3DAT6);
-FUNC_GROUP_DECL(SD3DAT6, Y3);
+SIG_EXPR_LIST_DECL_SEMG(Y3, EMMCDAT6, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 2));
+PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, EMMCDAT6);
#define Y4 243
SIG_EXPR_LIST_DECL_SEMG(Y4, FWSPIDMISO, FWSPID, FWSPID,
SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y4, VBMISO, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y4, SD3DAT7, SD3DAT7, SIG_DESC_SET(SCU404, 3));
-PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, SD3DAT7);
-FUNC_GROUP_DECL(SD3DAT7, Y4);
+SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3));
+PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7);
GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4);
GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12);
+GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4);
FUNC_DECL_2(FWSPID, FWSPID, FWQSPID);
FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4);
-
+FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8);
/*
* FIXME: Confirm bits and priorities are the right way around for the
* following 4 pins
*/
#define AF25 244
-SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20),
- SIG_DESC_SET(SCU4D8, 20));
-SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_CLEAR(SCU438, 20),
- SIG_DESC_SET(SCU4D8, 20));
+SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20));
+SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_SET(SCU4D8, 20));
PIN_DECL_(AF25, SIG_EXPR_LIST_PTR(AF25, I3C3SCL),
SIG_EXPR_LIST_PTR(AF25, FSI1CLK));
#define AE26 245
-SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21),
- SIG_DESC_SET(SCU4D8, 21));
-SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_CLEAR(SCU438, 21),
- SIG_DESC_SET(SCU4D8, 21));
+SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21));
+SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_SET(SCU4D8, 21));
PIN_DECL_(AE26, SIG_EXPR_LIST_PTR(AE26, I3C3SDA),
SIG_EXPR_LIST_PTR(AE26, FSI1DATA));
FUNC_GROUP_DECL(FSI1, AF25, AE26);
#define AE25 246
-SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22),
- SIG_DESC_SET(SCU4D8, 22));
-SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_CLEAR(SCU438, 22),
- SIG_DESC_SET(SCU4D8, 22));
+SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22));
+SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_SET(SCU4D8, 22));
PIN_DECL_(AE25, SIG_EXPR_LIST_PTR(AE25, I3C4SCL),
SIG_EXPR_LIST_PTR(AE25, FSI2CLK));
#define AF24 247
-SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23),
- SIG_DESC_SET(SCU4D8, 23));
-SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_CLEAR(SCU438, 23),
- SIG_DESC_SET(SCU4D8, 23));
+SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23));
+SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_SET(SCU4D8, 23));
PIN_DECL_(AF24, SIG_EXPR_LIST_PTR(AF24, I3C4SDA),
SIG_EXPR_LIST_PTR(AF24, FSI2DATA));
ASPEED_PINCTRL_PIN(A3),
ASPEED_PINCTRL_PIN(AA11),
ASPEED_PINCTRL_PIN(AA12),
+ ASPEED_PINCTRL_PIN(AA16),
+ ASPEED_PINCTRL_PIN(AA17),
ASPEED_PINCTRL_PIN(AA23),
ASPEED_PINCTRL_PIN(AA24),
ASPEED_PINCTRL_PIN(AA25),
ASPEED_PINCTRL_PIN(AB11),
ASPEED_PINCTRL_PIN(AB12),
ASPEED_PINCTRL_PIN(AB15),
+ ASPEED_PINCTRL_PIN(AB16),
+ ASPEED_PINCTRL_PIN(AB17),
ASPEED_PINCTRL_PIN(AB18),
ASPEED_PINCTRL_PIN(AB19),
ASPEED_PINCTRL_PIN(AB22),
ASPEED_PINCTRL_PIN(AC11),
ASPEED_PINCTRL_PIN(AC12),
ASPEED_PINCTRL_PIN(AC15),
+ ASPEED_PINCTRL_PIN(AC16),
ASPEED_PINCTRL_PIN(AC17),
ASPEED_PINCTRL_PIN(AC18),
ASPEED_PINCTRL_PIN(AC19),
ASPEED_PINCTRL_PIN(AD12),
ASPEED_PINCTRL_PIN(AD14),
ASPEED_PINCTRL_PIN(AD15),
+ ASPEED_PINCTRL_PIN(AD16),
ASPEED_PINCTRL_PIN(AD19),
ASPEED_PINCTRL_PIN(AD20),
ASPEED_PINCTRL_PIN(AD22),
ASPEED_PINCTRL_PIN(AE12),
ASPEED_PINCTRL_PIN(AE14),
ASPEED_PINCTRL_PIN(AE15),
+ ASPEED_PINCTRL_PIN(AE16),
ASPEED_PINCTRL_PIN(AE18),
ASPEED_PINCTRL_PIN(AE19),
+ ASPEED_PINCTRL_PIN(AE25),
+ ASPEED_PINCTRL_PIN(AE26),
ASPEED_PINCTRL_PIN(AE7),
ASPEED_PINCTRL_PIN(AE8),
ASPEED_PINCTRL_PIN(AF10),
ASPEED_PINCTRL_PIN(AF12),
ASPEED_PINCTRL_PIN(AF14),
ASPEED_PINCTRL_PIN(AF15),
+ ASPEED_PINCTRL_PIN(AF24),
+ ASPEED_PINCTRL_PIN(AF25),
ASPEED_PINCTRL_PIN(AF7),
ASPEED_PINCTRL_PIN(AF8),
ASPEED_PINCTRL_PIN(AF9),
ASPEED_PINCTRL_PIN(Y3),
ASPEED_PINCTRL_PIN(Y4),
ASPEED_PINCTRL_PIN(Y5),
- ASPEED_PINCTRL_PIN(AB16),
- ASPEED_PINCTRL_PIN(AA17),
- ASPEED_PINCTRL_PIN(AB17),
- ASPEED_PINCTRL_PIN(AE16),
- ASPEED_PINCTRL_PIN(AC16),
- ASPEED_PINCTRL_PIN(AA16),
- ASPEED_PINCTRL_PIN(AD16),
- ASPEED_PINCTRL_PIN(AF25),
- ASPEED_PINCTRL_PIN(AE26),
- ASPEED_PINCTRL_PIN(AE25),
- ASPEED_PINCTRL_PIN(AF24),
};
static const struct aspeed_pin_group aspeed_g6_groups[] = {
ASPEED_PINCTRL_GROUP(SALT9G1),
ASPEED_PINCTRL_GROUP(SD1),
ASPEED_PINCTRL_GROUP(SD2),
- ASPEED_PINCTRL_GROUP(SD3),
- ASPEED_PINCTRL_GROUP(SD3DAT4),
- ASPEED_PINCTRL_GROUP(SD3DAT5),
- ASPEED_PINCTRL_GROUP(SD3DAT6),
- ASPEED_PINCTRL_GROUP(SD3DAT7),
+ ASPEED_PINCTRL_GROUP(EMMCG1),
+ ASPEED_PINCTRL_GROUP(EMMCG4),
+ ASPEED_PINCTRL_GROUP(EMMCG8),
ASPEED_PINCTRL_GROUP(SGPM1),
ASPEED_PINCTRL_GROUP(SGPS1),
ASPEED_PINCTRL_GROUP(SIOONCTRL),
ASPEED_PINCTRL_FUNC(ADC8),
ASPEED_PINCTRL_FUNC(ADC9),
ASPEED_PINCTRL_FUNC(BMCINT),
+ ASPEED_PINCTRL_FUNC(EMMC),
ASPEED_PINCTRL_FUNC(ESPI),
ASPEED_PINCTRL_FUNC(ESPIALT),
ASPEED_PINCTRL_FUNC(FSI1),
ASPEED_PINCTRL_FUNC(SALT9),
ASPEED_PINCTRL_FUNC(SD1),
ASPEED_PINCTRL_FUNC(SD2),
- ASPEED_PINCTRL_FUNC(SD3),
- ASPEED_PINCTRL_FUNC(SD3DAT4),
- ASPEED_PINCTRL_FUNC(SD3DAT5),
- ASPEED_PINCTRL_FUNC(SD3DAT6),
- ASPEED_PINCTRL_FUNC(SD3DAT7),
ASPEED_PINCTRL_FUNC(SGPM1),
ASPEED_PINCTRL_FUNC(SGPS1),
ASPEED_PINCTRL_FUNC(SIOONCTRL),
* @idx: The bit index in the register
*/
#define SIG_DESC_SET(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 1)
-#define SIG_DESC_CLEAR(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 0)
+#define SIG_DESC_CLEAR(reg, idx) { ASPEED_IP_SCU, reg, BIT_MASK(idx), 0, 0 }
#define SIG_DESC_LIST_SYM(sig, group) sig_descs_ ## sig ## _ ## group
#define SIG_DESC_LIST_DECL(sig, group, ...) \
static const char *FUNC_SYM(func)[] = { __VA_ARGS__ }
#define FUNC_DECL_2(func, one, two) FUNC_DECL_(func, #one, #two)
+#define FUNC_DECL_3(func, one, two, three) FUNC_DECL_(func, #one, #two, #three)
#define FUNC_GROUP_DECL(func, ...) \
GROUP_DECL(func, __VA_ARGS__); \
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2014-2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
/*
/* optional GPIO interrupt support */
irq = platform_get_irq(pdev, 0);
- if (irq) {
+ if (irq > 0) {
struct irq_chip *irqc;
struct gpio_irq_chip *girq;
const struct ns2_pin_function *func;
const struct ns2_pin_group *grp;
- if (grp_select > pinctrl->num_groups ||
- func_select > pinctrl->num_functions)
+ if (grp_select >= pinctrl->num_groups ||
+ func_select >= pinctrl->num_functions)
return -EINVAL;
func = &pinctrl->functions[func_select];
BERLIN_PINCTRL_FUNCTION(0x0, "gpio"), /* GPIO5 */
BERLIN_PINCTRL_FUNCTION(0x1, "i2s1"), /* DO3 */
BERLIN_PINCTRL_FUNCTION(0x2, "pwm"), /* PWM5 */
- BERLIN_PINCTRL_FUNCTION(0x3, "spififib"), /* SPDIFIB */
+ BERLIN_PINCTRL_FUNCTION(0x3, "spdifib"), /* SPDIFIB */
BERLIN_PINCTRL_FUNCTION(0x4, "spdifo"), /* SPDIFO */
BERLIN_PINCTRL_FUNCTION(0x5, "phy")), /* DBG5 */
BERLIN_PINCTRL_GROUP("I2S1_MCLK", 0x0, 0x3, 0x12,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{}
* @pctldesc: Pin controller description
* @pctldev: Pointer to the pin controller device
* @chip: GPIO chip in this pin controller
+ * @irqchip: IRQ chip in this pin controller
* @soc: SoC/PCH specific pin configuration data
* @communities: All communities in this pin controller
* @ncommunities: Number of communities in this pin controller
struct pinctrl_desc pctldesc;
struct pinctrl_dev *pctldev;
struct gpio_chip chip;
+ struct irq_chip irqchip;
const struct intel_pinctrl_soc_data *soc;
struct intel_community *communities;
size_t ncommunities;
return ret;
}
-static struct irq_chip intel_gpio_irqchip = {
- .name = "intel-gpio",
- .irq_ack = intel_gpio_irq_ack,
- .irq_mask = intel_gpio_irq_mask,
- .irq_unmask = intel_gpio_irq_unmask,
- .irq_set_type = intel_gpio_irq_type,
- .irq_set_wake = intel_gpio_irq_wake,
- .flags = IRQCHIP_MASK_ON_SUSPEND,
-};
-
static int intel_gpio_add_pin_ranges(struct intel_pinctrl *pctrl,
const struct intel_community *community)
{
pctrl->chip = intel_gpio_chip;
+ /* Setup GPIO chip */
pctrl->chip.ngpio = intel_gpio_ngpio(pctrl);
pctrl->chip.label = dev_name(pctrl->dev);
pctrl->chip.parent = pctrl->dev;
pctrl->chip.base = -1;
pctrl->irq = irq;
+ /* Setup IRQ chip */
+ pctrl->irqchip.name = dev_name(pctrl->dev);
+ pctrl->irqchip.irq_ack = intel_gpio_irq_ack;
+ pctrl->irqchip.irq_mask = intel_gpio_irq_mask;
+ pctrl->irqchip.irq_unmask = intel_gpio_irq_unmask;
+ pctrl->irqchip.irq_set_type = intel_gpio_irq_type;
+ pctrl->irqchip.irq_set_wake = intel_gpio_irq_wake;
+ pctrl->irqchip.flags = IRQCHIP_MASK_ON_SUSPEND;
+
ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl);
if (ret) {
dev_err(pctrl->dev, "failed to register gpiochip\n");
return ret;
}
- ret = gpiochip_irqchip_add(&pctrl->chip, &intel_gpio_irqchip, 0,
+ ret = gpiochip_irqchip_add(&pctrl->chip, &pctrl->irqchip, 0,
handle_bad_irq, IRQ_TYPE_NONE);
if (ret) {
dev_err(pctrl->dev, "failed to add irqchip\n");
return ret;
}
- gpiochip_set_chained_irqchip(&pctrl->chip, &intel_gpio_irqchip, irq,
- NULL);
+ gpiochip_set_chained_irqchip(&pctrl->chip, &pctrl->irqchip, irq, NULL);
return 0;
}
PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19),
BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19),
18, 2, "gpio", "uart"),
- PIN_GRP_GPIO("led0_od", 11, 1, BIT(20), "led"),
- PIN_GRP_GPIO("led1_od", 12, 1, BIT(21), "led"),
- PIN_GRP_GPIO("led2_od", 13, 1, BIT(22), "led"),
- PIN_GRP_GPIO("led3_od", 14, 1, BIT(23), "led"),
+ PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"),
+ PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"),
+ PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"),
+ PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"),
};
};
static inline void armada_37xx_update_reg(unsigned int *reg,
- unsigned int offset)
+ unsigned int *offset)
{
/* We never have more than 2 registers */
- if (offset >= GPIO_PER_REG) {
- offset -= GPIO_PER_REG;
+ if (*offset >= GPIO_PER_REG) {
+ *offset -= GPIO_PER_REG;
*reg += sizeof(u32);
}
}
{
int offset = irqd_to_hwirq(d);
- armada_37xx_update_reg(reg, offset);
+ armada_37xx_update_reg(reg, &offset);
}
static int armada_37xx_gpio_direction_input(struct gpio_chip *chip,
unsigned int reg = OUTPUT_EN;
unsigned int mask;
- armada_37xx_update_reg(®, offset);
+ armada_37xx_update_reg(®, &offset);
mask = BIT(offset);
return regmap_update_bits(info->regmap, reg, mask, 0);
unsigned int reg = OUTPUT_EN;
unsigned int val, mask;
- armada_37xx_update_reg(®, offset);
+ armada_37xx_update_reg(®, &offset);
mask = BIT(offset);
regmap_read(info->regmap, reg, &val);
unsigned int reg = OUTPUT_EN;
unsigned int mask, val, ret;
- armada_37xx_update_reg(®, offset);
+ armada_37xx_update_reg(®, &offset);
mask = BIT(offset);
ret = regmap_update_bits(info->regmap, reg, mask, mask);
unsigned int reg = INPUT_VAL;
unsigned int val, mask;
- armada_37xx_update_reg(®, offset);
+ armada_37xx_update_reg(®, &offset);
mask = BIT(offset);
regmap_read(info->regmap, reg, &val);
unsigned int reg = OUTPUT_VAL;
unsigned int mask, val;
- armada_37xx_update_reg(®, offset);
+ armada_37xx_update_reg(®, &offset);
mask = BIT(offset);
val = value ? mask : 0;
static int stmfx_pinctrl_remove(struct platform_device *pdev)
{
- struct stmfx *stmfx = dev_get_platdata(&pdev->dev);
+ struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent);
return stmfx_function_disable(stmfx,
STMFX_FUNC_GPIO |
static int cmpc_accel_remove_v4(struct acpi_device *acpi)
{
- struct input_dev *inputdev;
- struct cmpc_accel *accel;
-
- inputdev = dev_get_drvdata(&acpi->dev);
- accel = dev_get_drvdata(&inputdev->dev);
-
device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr_v4);
device_remove_file(&acpi->dev, &cmpc_accel_g_select_attr_v4);
return cmpc_remove_acpi_notify_device(acpi);
static int cmpc_accel_remove(struct acpi_device *acpi)
{
- struct input_dev *inputdev;
- struct cmpc_accel *accel;
-
- inputdev = dev_get_drvdata(&acpi->dev);
- accel = dev_get_drvdata(&inputdev->dev);
-
device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr);
return cmpc_remove_acpi_notify_device(acpi);
}
if (ret < 0) {
dev_dbg(dev, "Error requesting irq at index %d: %d\n",
inst_data[i].irq_idx, ret);
+ goto error;
}
board_info.irq = ret;
break;
platform_set_drvdata(pdev, punit_ipcdev);
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq < 0) {
- punit_ipcdev->irq = 0;
dev_warn(&pdev->dev, "Invalid IRQ, using polling mode\n");
} else {
ret = devm_request_irq(&pdev->dev, irq, intel_punit_ioc,
help
This driver adds support for using the PCH EG20T as a PTP
clock. The hardware supports time stamping of PTP packets
- when using the end-to-end delay (E2E) mechansim. The peer
- delay mechansim (P2P) is not supported.
+ when using the end-to-end delay (E2E) mechanism. The peer
+ delay mechanism (P2P) is not supported.
This clock is only useful if your PTP programs are getting
hardware time stamps on the PTP Ethernet packets using the
static void qeth_l2_vnicc_init(struct qeth_card *card)
{
u32 *timeout = &card->options.vnicc.learning_timeout;
+ bool enable, error = false;
unsigned int chars_len, i;
unsigned long chars_tmp;
u32 sup_cmds, vnicc;
- bool enable, error;
QETH_CARD_TEXT(card, 2, "vniccini");
/* reset rx_bcast */
chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE;
for_each_set_bit(i, &chars_tmp, chars_len) {
vnicc = BIT(i);
- qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds);
- if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) ||
- !(sup_cmds & IPA_VNICC_GET_TIMEOUT))
+ if (qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds)) {
+ sup_cmds = 0;
+ error = true;
+ }
+ if ((sup_cmds & IPA_VNICC_SET_TIMEOUT) &&
+ (sup_cmds & IPA_VNICC_GET_TIMEOUT))
+ card->options.vnicc.getset_timeout_sup |= vnicc;
+ else
card->options.vnicc.getset_timeout_sup &= ~vnicc;
- if (!(sup_cmds & IPA_VNICC_ENABLE) ||
- !(sup_cmds & IPA_VNICC_DISABLE))
+ if ((sup_cmds & IPA_VNICC_ENABLE) &&
+ (sup_cmds & IPA_VNICC_DISABLE))
+ card->options.vnicc.set_char_sup |= vnicc;
+ else
card->options.vnicc.set_char_sup &= ~vnicc;
}
/* enforce assumed default values and recover settings, if changed */
- error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
- timeout);
+ error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
+ timeout);
chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT;
chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE;
chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE;
struct kmem_cache *zfcp_fsf_qtcb_cache;
+static bool ber_stop = true;
+module_param(ber_stop, bool, 0600);
+MODULE_PARM_DESC(ber_stop,
+ "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)");
+
static void zfcp_fsf_request_timeout_handler(struct timer_list *t)
{
struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer);
case FSF_STATUS_READ_SENSE_DATA_AVAIL:
break;
case FSF_STATUS_READ_BIT_ERROR_THRESHOLD:
- dev_warn(&adapter->ccw_device->dev,
- "The error threshold for checksum statistics "
- "has been exceeded\n");
zfcp_dbf_hba_bit_err("fssrh_3", req);
+ if (ber_stop) {
+ dev_warn(&adapter->ccw_device->dev,
+ "All paths over this FCP device are disused because of excessive bit errors\n");
+ zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b");
+ } else {
+ dev_warn(&adapter->ccw_device->dev,
+ "The error threshold for checksum statistics has been exceeded\n");
+ }
break;
case FSF_STATUS_READ_LINK_DOWN:
zfcp_fsf_status_read_link_down(req);
if (sense_len == 0) {
rsp->status_srb = NULL;
sp->done(sp, cp->result);
- } else {
- WARN_ON_ONCE(true);
}
}
ses->data_direction = scmd->sc_data_direction;
ses->sdb = scmd->sdb;
ses->result = scmd->result;
+ ses->resid_len = scmd->req.resid_len;
ses->underflow = scmd->underflow;
ses->prot_op = scmd->prot_op;
ses->eh_eflags = scmd->eh_eflags;
memset(scmd->cmnd, 0, BLK_MAX_CDB);
memset(&scmd->sdb, 0, sizeof(scmd->sdb));
scmd->result = 0;
+ scmd->req.resid_len = 0;
if (sense_bytes) {
scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE,
scmd->sc_data_direction = ses->data_direction;
scmd->sdb = ses->sdb;
scmd->result = ses->result;
+ scmd->req.resid_len = ses->resid_len;
scmd->underflow = ses->underflow;
scmd->prot_op = ses->prot_op;
scmd->eh_eflags = ses->eh_eflags;
.init_request = scsi_mq_init_request,
.exit_request = scsi_mq_exit_request,
.initialize_rq_fn = scsi_initialize_rq,
+ .cleanup_rq = scsi_cleanup_rq,
.busy = scsi_mq_lld_busy,
.map_queues = scsi_map_queues,
};
{
struct scsi_device *sdev = NULL;
- if (q->mq_ops == &scsi_mq_ops)
+ if (q->mq_ops == &scsi_mq_ops_no_commit ||
+ q->mq_ops == &scsi_mq_ops)
sdev = q->queuedata;
if (!sdev || !get_device(&sdev->sdev_gendev))
sdev = NULL;
/* we need to evaluate the error return */
if (scsi_sense_valid(sshdr) &&
(sshdr->asc == 0x3a || /* medium not present */
- sshdr->asc == 0x20)) /* invalid command */
+ sshdr->asc == 0x20 || /* invalid command */
+ (sshdr->asc == 0x74 && sshdr->ascq == 0x71))) /* drive is password locked */
/* this is no error here */
return 0;
vhost_test_stop(n, &private);
vhost_test_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_cleanup(&n->dev);
/* We do an extra flush before freeing memory,
* since jobs can re-queue themselves. */
}
vhost_test_stop(n, &priv);
vhost_test_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_reset_owner(&n->dev, umem);
done:
mutex_unlock(&n->dev.mutex);
mappass->reqcopy = *req;
icsk = inet_csk(mappass->sock->sk);
queue = &icsk->icsk_accept_queue;
- data = queue->rskq_accept_head != NULL;
+ data = READ_ONCE(queue->rskq_accept_head) != NULL;
if (data) {
mappass->reqcopy.cmd = 0;
ret = 0;
}
done:
- if (*p != end)
- goto bad;
+ /* Skip over any unrecognized fields */
+ *p = end;
return 0;
bad:
goto bad;
info->filelock_reply = *p;
- *p += sizeof(*info->filelock_reply);
- if (unlikely(*p != end))
- goto bad;
+ /* Skip over any unrecognized fields */
+ *p = end;
return 0;
-
bad:
return -EIO;
}
{
if (features == (u64)-1 ||
(features & CEPH_FEATURE_REPLY_CREATE_INODE)) {
+ /* Malformed reply? */
if (*p == end) {
info->has_create_ino = false;
} else {
info->has_create_ino = true;
- info->ino = ceph_decode_64(p);
+ ceph_decode_64_safe(p, end, info->ino, bad);
}
+ } else {
+ if (*p != end)
+ goto bad;
}
- if (unlikely(*p != end))
- goto bad;
+ /* Skip over any unrecognized fields */
+ *p = end;
return 0;
-
bad:
return -EIO;
}
}
}
-/**
+/*
* dio_complete() - called when all DIO BIO I/O has been completed
- * @offset: the byte offset in the file of the completed operation
*
* This drops i_dio_count, lets interested parties know that a DIO operation
* has completed, and calculates the resulting return code for the operation.
* cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
* @bdi_id: target bdi id
* @memcg_id: target memcg css id
- * @nr_pages: number of pages to write, 0 for best-effort dirty flushing
+ * @nr: number of pages to write, 0 for best-effort dirty flushing
* @reason: reason why some writeback work initiated
* @done: target wb_completion
*
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
#define REQ_F_TIMEOUT 1024 /* timeout request */
+#define REQ_F_ISREG 2048 /* regular file */
+#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
u64 user_data;
u32 result;
u32 sequence;
return ret;
}
-static void kiocb_end_write(struct kiocb *kiocb)
+static void kiocb_end_write(struct io_kiocb *req)
{
- if (kiocb->ki_flags & IOCB_WRITE) {
- struct inode *inode = file_inode(kiocb->ki_filp);
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+ * thread.
+ */
+ if (req->flags & REQ_F_ISREG) {
+ struct inode *inode = file_inode(req->file);
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (S_ISREG(inode->i_mode))
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- file_end_write(kiocb->ki_filp);
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
}
+ file_end_write(req->file);
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
- kiocb_end_write(kiocb);
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result)
req->flags |= REQ_F_FAIL_LINK;
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
- kiocb_end_write(kiocb);
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result)
req->flags |= REQ_F_FAIL_LINK;
if (!req->file)
return -EBADF;
- if (force_nonblock && !io_file_supports_async(req->file))
- force_nonblock = false;
+ if (S_ISREG(file_inode(req->file)->i_mode))
+ req->flags |= REQ_F_ISREG;
+
+ /*
+ * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
+ * we know to async punt it even if it was opened O_NONBLOCK
+ */
+ if (force_nonblock && !io_file_supports_async(req->file)) {
+ req->flags |= REQ_F_MUST_PUNT;
+ return -EAGAIN;
+ }
kiocb->ki_pos = READ_ONCE(sqe->off);
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
return ret;
/* don't allow async punt if RWF_NOWAIT was requested */
- if (kiocb->ki_flags & IOCB_NOWAIT)
+ if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+ (req->file->f_flags & O_NONBLOCK))
req->flags |= REQ_F_NOWAIT;
if (force_nonblock)
* need async punt anyway, so it's more efficient to do it
* here.
*/
- if (force_nonblock && ret2 > 0 && ret2 < read_size)
+ if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
+ (req->flags & REQ_F_ISREG) &&
+ ret2 > 0 && ret2 < read_size)
ret2 = -EAGAIN;
/* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
* released so that it doesn't complain about the held lock when
* we return to userspace.
*/
- if (S_ISREG(file_inode(file)->i_mode)) {
+ if (req->flags & REQ_F_ISREG) {
__sb_start_write(file_inode(file)->i_sb,
SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb,
static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- unsigned count, req_dist, tail_index;
+ unsigned count;
struct io_ring_ctx *ctx = req->ctx;
struct list_head *entry;
struct timespec64 ts;
count = 1;
req->sequence = ctx->cached_sq_head + count - 1;
+ /* reuse it to store the count */
+ req->submit.sequence = count;
req->flags |= REQ_F_TIMEOUT;
/*
* Insertion sort, ensuring the first entry in the list is always
* the one we need first.
*/
- tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
- req_dist = req->sequence - tail_index;
spin_lock_irq(&ctx->completion_lock);
list_for_each_prev(entry, &ctx->timeout_list) {
struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
- unsigned dist;
+ unsigned nxt_sq_head;
+ long long tmp, tmp_nxt;
- dist = nxt->sequence - tail_index;
- if (req_dist >= dist)
+ /*
+ * Since cached_sq_head + count - 1 can overflow, use type long
+ * long to store it.
+ */
+ tmp = (long long)ctx->cached_sq_head + count - 1;
+ nxt_sq_head = nxt->sequence - nxt->submit.sequence + 1;
+ tmp_nxt = (long long)nxt_sq_head + nxt->submit.sequence - 1;
+
+ /*
+ * cached_sq_head may overflow, and it will never overflow twice
+ * once there is some timeout req still be valid.
+ */
+ if (ctx->cached_sq_head < nxt_sq_head)
+ tmp += UINT_MAX;
+
+ if (tmp >= tmp_nxt)
break;
}
list_add(&req->list, entry);
int ret;
ret = __io_submit_sqe(ctx, req, s, force_nonblock);
- if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+
+ /*
+ * We async punt it if the file wasn't marked NOWAIT, or if the file
+ * doesn't support non-blocking read/write attempts
+ */
+ if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
+ (req->flags & REQ_F_MUST_PUNT))) {
struct io_uring_sqe *sqe_copy;
sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
/**
* simple_write_end - .write_end helper for non-block-device FSes
- * @available: See .write_end of address_space_operations
- * @file: "
+ * @file: See .write_end of address_space_operations
* @mapping: "
* @pos: "
* @len: "
transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
if (IS_ERR(transfer_to[USRQUOTA])) {
status = PTR_ERR(transfer_to[USRQUOTA]);
+ transfer_to[USRQUOTA] = NULL;
goto bail_unlock;
}
}
transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
if (IS_ERR(transfer_to[GRPQUOTA])) {
status = PTR_ERR(transfer_to[GRPQUOTA]);
+ transfer_to[GRPQUOTA] = NULL;
goto bail_unlock;
}
}
/* At this point, we know that no more recovery threads can be
* launched, so wait for any recovery completion work to
* complete. */
- flush_workqueue(osb->ocfs2_wq);
+ if (osb->ocfs2_wq)
+ flush_workqueue(osb->ocfs2_wq);
/*
* Now that recovery is shut down, and the osb is about to be
struct ocfs2_dinode *alloc = NULL;
cancel_delayed_work(&osb->la_enable_wq);
- flush_workqueue(osb->ocfs2_wq);
+ if (osb->ocfs2_wq)
+ flush_workqueue(osb->ocfs2_wq);
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out;
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
- show_val_kb(m, "FileHugePages: ",
+ show_val_kb(m, "FileHugePages: ",
global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
- show_val_kb(m, "FilePmdMapped: ",
+ show_val_kb(m, "FilePmdMapped: ",
global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
#endif
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
+
if (!ppage || PageSlab(ppage) || page_has_type(ppage))
pcount = 0;
else
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
if (put_user(stable_page_flags(ppage), out)) {
ret = -EFAULT;
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
if (ppage)
ino = page_cgroup_ino(ppage);
*/
static int verify_dirent_name(const char *name, int len)
{
- if (WARN_ON_ONCE(!len))
+ if (!len)
return -EIO;
- if (WARN_ON_ONCE(memchr(name, '/', len)))
+ if (memchr(name, '/', len))
return -EIO;
return 0;
}
uint64_t bs_blocks; /* number of blocks */
uint64_t bs_xflags; /* extended flags */
- uint64_t bs_atime; /* access time, seconds */
- uint64_t bs_mtime; /* modify time, seconds */
+ int64_t bs_atime; /* access time, seconds */
+ int64_t bs_mtime; /* modify time, seconds */
- uint64_t bs_ctime; /* inode change time, seconds */
- uint64_t bs_btime; /* creation time, seconds */
+ int64_t bs_ctime; /* inode change time, seconds */
+ int64_t bs_btime; /* creation time, seconds */
uint32_t bs_gen; /* generation count */
uint32_t bs_uid; /* user id */
}
/**
- * bitmap_or_equal - Check whether the or of two bitnaps is equal to a third
+ * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third
* @src1: Pointer to bitmap 1
* @src2: Pointer to bitmap 2 will be or'ed with bitmap 1
* @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
+ * @nbits: number of bits in each of these bitmaps
*
* Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
*/
*/
bool threaded;
+ /**
+ * @init_hw: optional routine to initialize hardware before
+ * an IRQ chip will be added. This is quite useful when
+ * a particular driver wants to clear IRQ related registers
+ * in order to avoid undesired events.
+ */
+ int (*init_hw)(struct gpio_chip *chip);
+
/**
* @init_valid_mask: optional routine to initialize @valid_mask, to be
* used if not all GPIO lines are valid interrupts. Sometimes some
struct preempt_notifier preempt_notifier;
#endif
int cpu;
- int vcpu_id;
+ int vcpu_id; /* id given by userspace at creation */
+ int vcpu_idx; /* index in kvm->vcpus array */
int srcu_idx;
int mode;
u64 requests;
struct mutex mutex;
struct kvm_run *run;
- int guest_xcr0_loaded;
struct swait_queue_head wq;
struct pid __rcu *pid;
int sigset_active;
static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *tmp;
- int idx;
-
- kvm_for_each_vcpu(idx, tmp, vcpu->kvm)
- if (tmp == vcpu)
- return idx;
- BUG();
+ return vcpu->vcpu_idx;
}
#define kvm_for_each_memslot(memslot, slots) \
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
+void kvm_put_kvm_no_destroy(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
unsigned long *vcpu_bitmap, cpumask_var_t tmp);
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
+ unsigned long *vcpu_bitmap);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
#define PHY_ID_KSZ886X 0x00221430
#define PHY_ID_KSZ8863 0x00221435
-#define PHY_ID_KSZ8795 0x00221550
+#define PHY_ID_KSZ87XX 0x00221550
#define PHY_ID_KSZ9477 0x00221631
enum page_ext_flags {
PAGE_EXT_OWNER,
- PAGE_EXT_OWNER_ACTIVE,
+ PAGE_EXT_OWNER_ALLOCATED,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,
unsigned long flags;
};
+extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
struct page_ext *lookup_page_ext(const struct page *page);
+static inline struct page_ext *page_ext_next(struct page_ext *curr)
+{
+ void *next = curr;
+ next += page_ext_size;
+ return next;
+}
+
#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;
extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
+extern int perf_event_period(struct perf_event *event, u64 value);
+extern u64 perf_event_pause(struct perf_event *event, bool reset);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
static inline int __perf_event_disable(void *info) { return -1; }
static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
+static inline int perf_event_period(struct perf_event *event, u64 value)
+{
+ return -EINVAL;
+}
+static inline u64 perf_event_pause(struct perf_event *event, bool reset)
+{
+ return 0;
+}
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
+asmlinkage void preempt_schedule_irq(void);
extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
int skb_vlan_pop(struct sk_buff *skb);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
-int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto);
-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto);
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+ int mac_len);
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len);
int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse);
int skb_mpls_dec_ttl(struct sk_buff *skb);
struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
/* fastopen_rsk points to request_sock that resulted in this big
* socket. Used to retransmit SYNACKs etc.
*/
- struct request_sock *fastopen_rsk;
+ struct request_sock __rcu *fastopen_rsk;
u32 *saved_syn;
};
static inline bool tcp_passive_fastopen(const struct sock *sk)
{
- return (sk->sk_state == TCP_SYN_RECV &&
- tcp_sk(sk)->fastopen_rsk != NULL);
+ return sk->sk_state == TCP_SYN_RECV &&
+ rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL;
}
static inline void fastopen_queue_tune(struct sock *sk, int backlog)
* This structure is used either directly or via the XA_LIMIT() macro
* to communicate the range of IDs that are valid for allocation.
* Two common ranges are predefined for you:
- * * xa_limit_32b - [0 - UINT_MAX]
- * * xa_limit_31b - [0 - INT_MAX]
+ * * xa_limit_32b - [0 - UINT_MAX]
+ * * xa_limit_31b - [0 - INT_MAX]
*/
struct xa_limit {
u32 max;
*/
const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
+/**
+ * regulatory_pre_cac_allowed - check if pre-CAC allowed in the current regdom
+ * @wiphy: wiphy for which pre-CAC capability is checked.
+ *
+ * Pre-CAC is allowed only in some regdomains (notable ETSI).
+ */
+bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
+
/**
* DOC: Internal regulatory db functions
*
/* Access to a connection */
int llc_conn_state_process(struct sock *sk, struct sk_buff *skb);
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb);
void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
struct net {
+ /* First cache line can be often dirtied.
+ * Do not place here read-mostly fields.
+ */
refcount_t passive; /* To decide when the network
* namespace should be freed.
*/
*/
spinlock_t rules_mod_lock;
- u32 hash_mix;
+ unsigned int dev_unreg_count;
+
+ unsigned int dev_base_seq; /* protected by rtnl_mutex */
+ int ifindex;
+
+ spinlock_t nsid_lock;
+ atomic_t fnhe_genid;
struct list_head list; /* list of network namespaces */
struct list_head exit_list; /* To linked to call pernet exit
#endif
struct user_namespace *user_ns; /* Owning user namespace */
struct ucounts *ucounts;
- spinlock_t nsid_lock;
struct idr netns_ids;
struct ns_common ns;
+ struct list_head dev_base_head;
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
struct uevent_sock *uevent_sock; /* uevent socket */
- struct list_head dev_base_head;
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
- unsigned int dev_base_seq; /* protected by rtnl_mutex */
- int ifindex;
- unsigned int dev_unreg_count;
+ /* Note that @hash_mix can be read millions times per second,
+ * it is critical that it is on a read_mostly cache line.
+ */
+ u32 hash_mix;
+
+ struct net_device *loopback_dev; /* The loopback */
/* core fib_rules */
struct list_head rules_ops;
- struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
struct sock *crypto_nlsk;
#endif
struct sock *diag_nlsk;
- atomic_t fnhe_genid;
} __randomize_layout;
#include <linux/seq_file_net.h>
static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
{
- return queue->rskq_accept_head == NULL;
+ return READ_ONCE(queue->rskq_accept_head) == NULL;
}
static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
req = queue->rskq_accept_head;
if (req) {
sk_acceptq_removed(parent);
- queue->rskq_accept_head = req->dl_next;
+ WRITE_ONCE(queue->rskq_accept_head, req->dl_next);
if (queue->rskq_accept_head == NULL)
queue->rskq_accept_tail = NULL;
}
return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
}
+static inline bool sctp_newsk_ready(const struct sock *sk)
+{
+ return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
+}
+
#endif /* __net_sctp_h__ */
*/
static inline int sk_stream_min_wspace(const struct sock *sk)
{
- return sk->sk_wmem_queued >> 1;
+ return READ_ONCE(sk->sk_wmem_queued) >> 1;
}
static inline int sk_stream_wspace(const struct sock *sk)
{
- return sk->sk_sndbuf - sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
+}
+
+static inline void sk_wmem_queued_add(struct sock *sk, int val)
+{
+ WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}
void sk_stream_write_space(struct sock *sk);
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
- if (sk->sk_wmem_queued >= sk->sk_sndbuf)
+ if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
return false;
return sk->sk_prot->stream_memory_free ?
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
- sk->sk_wmem_queued -= skb->truesize;
+ sk_wmem_queued_add(sk, -skb->truesize);
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
}
static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
- if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
- sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
- sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
- }
+ u32 val;
+
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ return;
+
+ val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
+
+ WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
}
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
*/
static inline bool sock_writeable(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+ return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
}
static inline gfp_t gfp_any(void)
static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
{
- return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
+ int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len);
+
+ return v ?: 1;
}
/* Alas, with timeout socket operations are not restartable.
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return tcp_memory_pressure;
+ return READ_ONCE(tcp_memory_pressure);
}
/*
* The next routines deal with comparing 32 bit unsigned ints
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len -
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
+ READ_ONCE(sk->sk_backlog.len) -
atomic_read(&sk->sk_rmem_alloc));
}
static inline int tcp_full_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf);
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
extern void tcp_openreq_init_rwin(struct request_sock *req,
static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 notsent_bytes = tp->write_seq - tp->snd_nxt;
+ u32 notsent_bytes = READ_ONCE(tp->write_seq) -
+ READ_ONCE(tp->snd_nxt);
return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
}
struct scsi_eh_save {
/* saved state */
int result;
+ unsigned int resid_len;
int eh_eflags;
enum dma_data_direction data_direction;
unsigned underflow;
#define AZX_REG_ML_LOUTPAY 0x20
#define AZX_REG_ML_LINPAY 0x30
+/* bit0 is reserved, with BIT(1) mapping to stream1 */
+#define ML_LOSIDV_STREAM_MASK 0xFFFE
+
#define ML_LCTL_SCF_MASK 0xF
#define AZX_MLCTL_SPA (0x1 << 16)
#define AZX_MLCTL_CPA (0x1 << 23)
);
TRACE_EVENT(rxrpc_peer,
- TP_PROTO(struct rxrpc_peer *peer, enum rxrpc_peer_trace op,
+ TP_PROTO(unsigned int peer_debug_id, enum rxrpc_peer_trace op,
int usage, const void *where),
- TP_ARGS(peer, op, usage, where),
+ TP_ARGS(peer_debug_id, op, usage, where),
TP_STRUCT__entry(
__field(unsigned int, peer )
),
TP_fast_assign(
- __entry->peer = peer->debug_id;
+ __entry->peer = peer_debug_id;
__entry->op = op;
__entry->usage = usage;
__entry->where = where;
);
TRACE_EVENT(rxrpc_conn,
- TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op,
+ TP_PROTO(unsigned int conn_debug_id, enum rxrpc_conn_trace op,
int usage, const void *where),
- TP_ARGS(conn, op, usage, where),
+ TP_ARGS(conn_debug_id, op, usage, where),
TP_STRUCT__entry(
__field(unsigned int, conn )
),
TP_fast_assign(
- __entry->conn = conn->debug_id;
+ __entry->conn = conn_debug_id;
__entry->op = op;
__entry->usage = usage;
__entry->where = where;
);
TRACE_EVENT(rxrpc_call,
- TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op,
+ TP_PROTO(unsigned int call_debug_id, enum rxrpc_call_trace op,
int usage, const void *where, const void *aux),
- TP_ARGS(call, op, usage, where, aux),
+ TP_ARGS(call_debug_id, op, usage, where, aux),
TP_STRUCT__entry(
__field(unsigned int, call )
),
TP_fast_assign(
- __entry->call = call->debug_id;
+ __entry->call = call_debug_id;
__entry->op = op;
__entry->usage = usage;
__entry->where = where;
TP_fast_assign(
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
__entry->truesize = skb->truesize;
- __entry->sk_rcvbuf = sk->sk_rcvbuf;
+ __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf);
),
TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
__entry->sysctl_wmem = sk_get_wmem0(sk, prot);
__entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
- __entry->wmem_queued = sk->sk_wmem_queued;
+ __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued);
__entry->kind = kind;
),
#define KVM_CAP_PMU_EVENT_FILTER 173
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
-#define KVM_CAP_ARM_NISV_TO_USER 176
-#define KVM_CAP_ARM_INJECT_EXT_DABT 177
+#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
+#define KVM_CAP_ARM_NISV_TO_USER 177
+#define KVM_CAP_ARM_INJECT_EXT_DABT 178
#ifdef KVM_CAP_IRQ_ROUTING
perf_event_update_userpage(event);
}
+/* Assume it's not an event with inherit set. */
+u64 perf_event_pause(struct perf_event *event, bool reset)
+{
+ struct perf_event_context *ctx;
+ u64 count;
+
+ ctx = perf_event_ctx_lock(event);
+ WARN_ON_ONCE(event->attr.inherit);
+ _perf_event_disable(event);
+ count = local64_read(&event->count);
+ if (reset)
+ local64_set(&event->count, 0);
+ perf_event_ctx_unlock(event, ctx);
+
+ return count;
+}
+EXPORT_SYMBOL_GPL(perf_event_pause);
+
/*
* Holding the top-level event's child_mutex means that any
* descendant process that has inherited this event will block
return event->pmu->check_period(event, value);
}
-static int perf_event_period(struct perf_event *event, u64 __user *arg)
+static int _perf_event_period(struct perf_event *event, u64 value)
{
- u64 value;
-
if (!is_sampling_event(event))
return -EINVAL;
- if (copy_from_user(&value, arg, sizeof(value)))
- return -EFAULT;
-
if (!value)
return -EINVAL;
return 0;
}
+int perf_event_period(struct perf_event *event, u64 value)
+{
+ struct perf_event_context *ctx;
+ int ret;
+
+ ctx = perf_event_ctx_lock(event);
+ ret = _perf_event_period(event, value);
+ perf_event_ctx_unlock(event, ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(perf_event_period);
+
static const struct file_operations perf_fops;
static inline int perf_fget_light(int fd, struct fd *p)
return _perf_event_refresh(event, arg);
case PERF_EVENT_IOC_PERIOD:
- return perf_event_period(event, (u64 __user *)arg);
+ {
+ u64 value;
+ if (copy_from_user(&value, (u64 __user *)arg, sizeof(value)))
+ return -EFAULT;
+
+ return _perf_event_period(event, value);
+ }
case PERF_EVENT_IOC_ID:
{
u64 id = primary_event_id(event);
struct vm_area_struct *vma;
int ret, is_register, ref_ctr_updated = 0;
bool orig_page_huge = false;
+ unsigned int gup_flags = FOLL_FORCE;
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
retry:
+ if (is_register)
+ gup_flags |= FOLL_SPLIT_PMD;
/* Read the page with vaddr into memory */
- ret = get_user_pages_remote(NULL, mm, vaddr, 1,
- FOLL_FORCE | FOLL_SPLIT_PMD, &old_page, &vma, NULL);
+ ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags,
+ &old_page, &vma, NULL);
if (ret <= 0)
return ret;
if (ret <= 0)
goto put_old;
+ if (WARN(!is_register && PageCompound(old_page),
+ "uprobe unregister should never work on compound page\n")) {
+ ret = -EINVAL;
+ goto put_old;
+ }
+
/* We are going to replace instruction, update ref_ctr. */
if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
find $cpio_dir -type f -print0 |
xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
-# Create archive and try to normalize metadata for reproducibility
-tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
- --owner=0 --group=0 --sort=name --numeric-owner \
- -Jcf $tarfile -C $cpio_dir/ . > /dev/null
+# Create archive and try to normalize metadata for reproducibility.
+# For compatibility with older versions of tar, files are fed to tar
+# pre-sorted, as --sort=name might not be available.
+find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \
+ tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
+ --owner=0 --group=0 --numeric-owner --no-recursion \
+ -Jcf $tarfile -C $cpio_dir/ -T - > /dev/null
echo "$src_files_md5" > kernel/kheaders.md5
echo "$obj_files_md5" >> kernel/kheaders.md5
}
EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
-void __kthread_queue_delayed_work(struct kthread_worker *worker,
- struct kthread_delayed_work *dwork,
- unsigned long delay)
+static void __kthread_queue_delayed_work(struct kthread_worker *worker,
+ struct kthread_delayed_work *dwork,
+ unsigned long delay)
{
struct timer_list *timer = &dwork->timer;
struct kthread_work *work = &dwork->work;
#include <linux/seq_file.h>
#include <linux/suspend.h>
#include <linux/syscalls.h>
+#include <linux/pm_runtime.h>
#include "power.h"
* Copyright (C) 2010 SUSE Linux Products GmbH
* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
*/
+#include <linux/compiler.h>
#include <linux/completion.h>
#include <linux/cpu.h>
#include <linux/init.h>
/* Reset ack counter. */
atomic_set(&msdata->thread_ack, msdata->num_threads);
smp_wmb();
- msdata->state = newstate;
+ WRITE_ONCE(msdata->state, newstate);
}
/* Last one to ack a state moves to the next state. */
static int multi_cpu_stop(void *data)
{
struct multi_stop_data *msdata = data;
- enum multi_stop_state curstate = MULTI_STOP_NONE;
+ enum multi_stop_state newstate, curstate = MULTI_STOP_NONE;
int cpu = smp_processor_id(), err = 0;
const struct cpumask *cpumask;
unsigned long flags;
do {
/* Chill out and ensure we re-read multi_stop_state. */
stop_machine_yield(cpumask);
- if (msdata->state != curstate) {
- curstate = msdata->state;
+ newstate = READ_ONCE(msdata->state);
+ if (newstate != curstate) {
+ curstate = newstate;
switch (curstate) {
case MULTI_STOP_DISABLE_IRQ:
local_irq_disable();
#ifdef CONFIG_SPARC
#endif
-#ifdef __hppa__
+#ifdef CONFIG_PARISC
extern int pwrsw_enabled;
#endif
.proc_handler = proc_dointvec,
},
#endif
-#ifdef __hppa__
+#ifdef CONFIG_PARISC
{
.procname = "soft-power",
.data = &pwrsw_enabled,
struct hrtimer_clock_base *base;
for (;;) {
- base = timer->base;
+ base = READ_ONCE(timer->base);
if (likely(base != &migration_base)) {
raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
if (likely(base == timer->base))
return base;
/* See the comment in lock_hrtimer_base() */
- timer->base = &migration_base;
+ WRITE_ONCE(timer->base, &migration_base);
raw_spin_unlock(&base->cpu_base->lock);
raw_spin_lock(&new_base->cpu_base->lock);
raw_spin_unlock(&new_base->cpu_base->lock);
raw_spin_lock(&base->cpu_base->lock);
new_cpu_base = this_cpu_base;
- timer->base = base;
+ WRITE_ONCE(timer->base, base);
goto again;
}
- timer->base = new_base;
+ WRITE_ONCE(timer->base, new_base);
} else {
if (new_cpu_base != this_cpu_base &&
hrtimer_check_target(timer, new_base)) {
#include <linux/export.h>
#include <linux/generic-radix-tree.h>
#include <linux/gfp.h>
+#include <linux/kmemleak.h>
#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *))
#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
}
EXPORT_SYMBOL(__genradix_ptr);
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+ struct genradix_node *node;
+
+ node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+ /*
+ * We're using pages (not slab allocations) directly for kernel data
+ * structures, so we need to explicitly inform kmemleak of them in order
+ * to avoid false positive memory leak reports.
+ */
+ kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+ return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+ kmemleak_free(node);
+ free_page((unsigned long)node);
+}
+
/*
* Returns pointer to the specified byte @offset within @radix, allocating it if
* necessary - newly allocated slots are always zeroed out:
break;
if (!new_node) {
- new_node = (void *)
- __get_free_page(gfp_mask|__GFP_ZERO);
+ new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
n = READ_ONCE(*p);
if (!n) {
if (!new_node) {
- new_node = (void *)
- __get_free_page(gfp_mask|__GFP_ZERO);
+ new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
}
if (new_node)
- free_page((unsigned long) new_node);
+ genradix_free_node(new_node);
return &n->data[offset];
}
genradix_free_recurse(n->children[i], level - 1);
}
- free_page((unsigned long) n);
+ genradix_free_node(n);
}
int __genradix_prealloc(struct __genradix *radix, size_t size,
return 1;
}
+static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
+{
+ struct kmem_cache *c;
+ int i, iter, maxiter = 1024;
+ int num, bytes;
+ bool fail = false;
+ void *objects[10];
+
+ c = kmem_cache_create("test_cache", size, size, 0, NULL);
+ for (iter = 0; (iter < maxiter) && !fail; iter++) {
+ num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
+ objects);
+ for (i = 0; i < num; i++) {
+ bytes = count_nonzero_bytes(objects[i], size);
+ if (bytes)
+ fail = true;
+ fill_with_garbage(objects[i], size);
+ }
+
+ if (num)
+ kmem_cache_free_bulk(c, num, objects);
+ }
+ *total_failures += fail;
+ return 1;
+}
+
/*
* Test kmem_cache allocation by creating caches of different sizes, with and
* without constructors, with and without SLAB_TYPESAFE_BY_RCU.
num_tests += do_kmem_cache_size(size, ctor, rcu, zero,
&failures);
}
+ num_tests += do_kmem_cache_size_bulk(size, &failures);
}
REPORT_FAILURES_IN_FN();
*total_failures += failures;
static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size)
{
int ret = 0;
- size_t start, end, i;
- size_t zero_start = size / 4;
- size_t zero_end = size - zero_start;
+ size_t start, end, i, zero_start, zero_end;
+
+ if (test(size < 2 * PAGE_SIZE, "buffer too small"))
+ return -EINVAL;
+
+ /*
+ * We want to cross a page boundary to exercise the code more
+ * effectively. We also don't want to make the size we scan too large,
+ * otherwise the test can take a long time and cause soft lockups. So
+ * scan a 1024 byte region across the page boundary.
+ */
+ size = 1024;
+ start = PAGE_SIZE - (size / 2);
+
+ kmem += start;
+ umem += start;
+
+ zero_start = size / 4;
+ zero_end = size - zero_start;
/*
- * We conduct a series of check_nonzero_user() tests on a block of memory
- * with the following byte-pattern (trying every possible [start,end]
- * pair):
+ * We conduct a series of check_nonzero_user() tests on a block of
+ * memory with the following byte-pattern (trying every possible
+ * [start,end] pair):
*
* [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ]
*
- * And we verify that check_nonzero_user() acts identically to memchr_inv().
+ * And we verify that check_nonzero_user() acts identically to
+ * memchr_inv().
*/
memset(kmem, 0x0, size);
size_t ksize, usize;
umem_src = kmalloc(size, GFP_KERNEL);
- if ((ret |= test(umem_src == NULL, "kmalloc failed")))
+ ret = test(umem_src == NULL, "kmalloc failed");
+ if (ret)
goto out_free;
expected = kmalloc(size, GFP_KERNEL);
- if ((ret |= test(expected == NULL, "kmalloc failed")))
+ ret = test(expected == NULL, "kmalloc failed");
+ if (ret)
goto out_free;
/* Fill umem with a fixed byte pattern. */
/* Ensure the start of the pageblock or zone is online and valid */
block_pfn = pageblock_start_pfn(pfn);
- block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
+ block_pfn = max(block_pfn, zone->zone_start_pfn);
+ block_page = pfn_to_online_page(block_pfn);
if (block_page) {
page = block_page;
pfn = block_pfn;
}
/* Ensure the end of the pageblock or zone is online and valid */
- block_pfn += pageblock_nr_pages;
+ block_pfn = pageblock_end_pfn(pfn) - 1;
block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
end_page = pfn_to_online_page(block_pfn);
if (!end_page)
page += (1 << PAGE_ALLOC_COSTLY_ORDER);
pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
- } while (page < end_page);
+ } while (page <= end_page);
return false;
}
#include <linux/rmap.h>
#include <linux/delayacct.h>
#include <linux/psi.h>
+#include <linux/ramfs.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
}
static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
+ unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
{
unsigned long pte_end;
struct page *head, *page;
pte = READ_ONCE(*ptep);
- if (!pte_access_permitted(pte, write))
+ if (!pte_access_permitted(pte, flags & FOLL_WRITE))
return 0;
/* hugepages are never "special" */
}
static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
- unsigned int pdshift, unsigned long end, int write,
+ unsigned int pdshift, unsigned long end, unsigned int flags,
struct page **pages, int *nr)
{
pte_t *ptep;
ptep = hugepte_offset(hugepd, addr, pdshift);
do {
next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+ if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
return 0;
} while (ptep++, addr = next, addr != end);
}
#else
static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
- unsigned pdshift, unsigned long end, int write,
+ unsigned int pdshift, unsigned long end, unsigned int flags,
struct page **pages, int *nr)
{
return 0;
#endif /* CONFIG_ARCH_HAS_HUGEPD */
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
- unsigned long end, unsigned int flags, struct page **pages, int *nr)
+ unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
{
struct page *head, *page;
int refs;
ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
}
- if (mapping)
- __dec_node_page_state(page, NR_SHMEM_THPS);
+ if (mapping) {
+ if (PageSwapBacked(page))
+ __dec_node_page_state(page, NR_SHMEM_THPS);
+ else
+ __dec_node_page_state(page, NR_FILE_THPS);
+ }
+
spin_unlock(&ds_queue->split_queue_lock);
__split_huge_page(page, list, end, flags);
if (PageSwapCache(head)) {
struct page *page;
for (i = start_pfn; i < end_pfn; i++) {
- if (!pfn_valid(i))
+ page = pfn_to_online_page(i);
+ if (!page)
return false;
- page = pfn_to_page(i);
-
if (page_zone(page) != z)
return false;
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/cpumask.h>
+#include <linux/mman.h>
#include <linux/atomic.h>
#include <linux/user_namespace.h>
return object;
}
+/*
+ * Remove an object from the object_tree_root and object_list. Must be called
+ * with the kmemleak_lock held _if_ kmemleak is still enabled.
+ */
+static void __remove_object(struct kmemleak_object *object)
+{
+ rb_erase(&object->rb_node, &object_tree_root);
+ list_del_rcu(&object->object_list);
+}
+
/*
* Look up an object in the object search tree and remove it from both
* object_tree_root and object_list. The returned object's use_count should be
write_lock_irqsave(&kmemleak_lock, flags);
object = lookup_object(ptr, alias);
- if (object) {
- rb_erase(&object->rb_node, &object_tree_root);
- list_del_rcu(&object->object_list);
- }
+ if (object)
+ __remove_object(object);
write_unlock_irqrestore(&kmemleak_lock, flags);
return object;
static void __kmemleak_do_cleanup(void)
{
- struct kmemleak_object *object;
+ struct kmemleak_object *object, *tmp;
- rcu_read_lock();
- list_for_each_entry_rcu(object, &object_list, object_list)
- delete_object_full(object->pointer);
- rcu_read_unlock();
+ /*
+ * Kmemleak has already been disabled, no need for RCU list traversal
+ * or kmemleak_lock held.
+ */
+ list_for_each_entry_safe(object, tmp, &object_list, object_list) {
+ __remove_object(object);
+ __delete_object(object);
+ }
}
/*
align = SMP_CACHE_BYTES;
}
- if (end > memblock.current_limit)
- end = memblock.current_limit;
-
again:
found = memblock_find_in_range_node(size, align, start, end, nid,
flags);
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, nid);
+ if (max_addr > memblock.current_limit)
+ max_addr = memblock.current_limit;
+
alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
/* retry allocation without lower limit */
struct mem_cgroup *from,
struct mem_cgroup *to)
{
+ struct lruvec *from_vec, *to_vec;
+ struct pglist_data *pgdat;
unsigned long flags;
unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
int ret;
anon = PageAnon(page);
+ pgdat = page_pgdat(page);
+ from_vec = mem_cgroup_lruvec(pgdat, from);
+ to_vec = mem_cgroup_lruvec(pgdat, to);
+
spin_lock_irqsave(&from->move_lock, flags);
if (!anon && page_mapped(page)) {
- __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
- __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
+ __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
+ __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
}
/*
struct address_space *mapping = page_mapping(page);
if (mapping_cap_account_dirty(mapping)) {
- __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
- __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
+ __mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
+ __mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
}
}
if (PageWriteback(page)) {
- __mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
- __mod_memcg_state(to, NR_WRITEBACK, nr_pages);
+ __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
+ __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct task_struct *tsk;
unsigned long addr;
short size_shift;
- char addr_valid;
};
/*
}
}
tk->addr = page_address_in_vma(p, vma);
- tk->addr_valid = 1;
if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else
tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
/*
- * In theory we don't have to kill when the page was
- * munmaped. But it could be also a mremap. Since that's
- * likely very rare kill anyways just out of paranoia, but use
- * a SIGKILL because the error is not contained anymore.
+ * Send SIGKILL if "tk->addr == -EFAULT". Also, as
+ * "tk->size_shift" is always non-zero for !is_zone_device_page(),
+ * so "tk->size_shift == 0" effectively checks no mapping on
+ * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
+ * to a process' address space, it's possible not all N VMAs
+ * contain mappings for the page, but at least one VMA does.
+ * Only deliver SIGBUS with payload derived from the VMA that
+ * has a mapping for the page.
*/
- if (tk->addr == -EFAULT || tk->size_shift == 0) {
+ if (tk->addr == -EFAULT) {
pr_info("Memory failure: Unable to find user space address %lx in %s\n",
page_to_pfn(p), tsk->comm);
- tk->addr_valid = 0;
+ } else if (tk->size_shift == 0) {
+ kfree(tk);
+ return;
}
get_task_struct(tsk);
tk->tsk = tsk;
* make sure the process doesn't catch the
* signal and then access the memory. Just kill it.
*/
- if (fail || tk->addr_valid == 0) {
+ if (fail || tk->addr == -EFAULT) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
- if (!pfn_valid(pfn)) {
+ p = pfn_to_online_page(pfn);
+ if (!p) {
+ if (pfn_valid(pfn)) {
+ pgmap = get_dev_pagemap(pfn, NULL);
+ if (pgmap)
+ return memory_failure_dev_pagemap(pfn, flags,
+ pgmap);
+ }
pr_err("Memory failure: %#lx: memory outside kernel control\n",
pfn);
return -ENXIO;
}
- pgmap = get_dev_pagemap(pfn, NULL);
- if (pgmap)
- return memory_failure_dev_pagemap(pfn, flags, pgmap);
-
- p = pfn_to_page(pfn);
if (PageHuge(p))
return memory_failure_hugetlb(pfn, flags);
if (TestSetPageHWPoison(p)) {
zone_span_writeunlock(zone);
}
-static void shrink_pgdat_span(struct pglist_data *pgdat,
- unsigned long start_pfn, unsigned long end_pfn)
+static void update_pgdat_span(struct pglist_data *pgdat)
{
- unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
- unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
- unsigned long pgdat_end_pfn = p;
- unsigned long pfn;
- int nid = pgdat->node_id;
-
- if (pgdat_start_pfn == start_pfn) {
- /*
- * If the section is smallest section in the pgdat, it need
- * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
- * In this case, we find second smallest valid mem_section
- * for shrinking zone.
- */
- pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
- pgdat_end_pfn);
- if (pfn) {
- pgdat->node_start_pfn = pfn;
- pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
- }
- } else if (pgdat_end_pfn == end_pfn) {
- /*
- * If the section is biggest section in the pgdat, it need
- * shrink pgdat->node_spanned_pages.
- * In this case, we find second biggest valid mem_section for
- * shrinking zone.
- */
- pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
- start_pfn);
- if (pfn)
- pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
- }
-
- /*
- * If the section is not biggest or smallest mem_section in the pgdat,
- * it only creates a hole in the pgdat. So in this case, we need not
- * change the pgdat.
- * But perhaps, the pgdat has only hole data. Thus it check the pgdat
- * has only hole or not.
- */
- pfn = pgdat_start_pfn;
- for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SUBSECTION) {
- if (unlikely(!pfn_valid(pfn)))
- continue;
-
- if (pfn_to_nid(pfn) != nid)
- continue;
+ unsigned long node_start_pfn = 0, node_end_pfn = 0;
+ struct zone *zone;
- /* Skip range to be removed */
- if (pfn >= start_pfn && pfn < end_pfn)
- continue;
+ for (zone = pgdat->node_zones;
+ zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
+ unsigned long zone_end_pfn = zone->zone_start_pfn +
+ zone->spanned_pages;
- /* If we find valid section, we have nothing to do */
- return;
+ /* No need to lock the zones, they can't change. */
+ if (zone_end_pfn > node_end_pfn)
+ node_end_pfn = zone_end_pfn;
+ if (zone->zone_start_pfn < node_start_pfn)
+ node_start_pfn = zone->zone_start_pfn;
}
- /* The pgdat has no valid section */
- pgdat->node_start_pfn = 0;
- pgdat->node_spanned_pages = 0;
+ pgdat->node_start_pfn = node_start_pfn;
+ pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
}
static void __remove_zone(struct zone *zone, unsigned long start_pfn,
pgdat_resize_lock(zone->zone_pgdat, &flags);
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
- shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
+ update_pgdat_span(pgdat);
pgdat_resize_unlock(zone->zone_pgdat, &flags);
}
void memunmap_pages(struct dev_pagemap *pgmap)
{
struct resource *res = &pgmap->res;
+ struct page *first_page;
unsigned long pfn;
int nid;
put_page(pfn_to_page(pfn));
dev_pagemap_cleanup(pgmap);
+ /* make sure to access a memmap that was actually initialized */
+ first_page = pfn_to_page(pfn_first(pgmap));
+
/* pages are dead and unused, undo the arch mapping */
- nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
+ nid = page_to_nid(first_page);
mem_hotplug_begin();
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
- pfn = PHYS_PFN(res->start);
- __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
- PHYS_PFN(resource_size(res)), NULL);
+ __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)), NULL);
} else {
arch_remove_memory(nid, res->start, resource_size(res),
pgmap_altmap(pgmap));
if (page)
goto got_pg;
- if (order >= pageblock_order && (gfp_mask & __GFP_IO)) {
+ if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
* or is prohibited because it recently failed at this
- * order, fail immediately.
+ * order, fail immediately unless the allocator has
+ * requested compaction and reclaim retry.
*
* Reclaim is
* - potentially very expensive because zones are far
#endif
};
+unsigned long page_ext_size = sizeof(struct page_ext);
+
static unsigned long total_usage;
-static unsigned long extra_mem;
static bool __init invoke_need_callbacks(void)
{
for (i = 0; i < entries; i++) {
if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
- page_ext_ops[i]->offset = sizeof(struct page_ext) +
- extra_mem;
- extra_mem += page_ext_ops[i]->size;
+ page_ext_ops[i]->offset = page_ext_size;
+ page_ext_size += page_ext_ops[i]->size;
need = true;
}
}
}
}
-static unsigned long get_entry_size(void)
-{
- return sizeof(struct page_ext) + extra_mem;
-}
-
static inline struct page_ext *get_entry(void *base, unsigned long index)
{
- return base + get_entry_size() * index;
+ return base + page_ext_size * index;
}
#if !defined(CONFIG_SPARSEMEM)
!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
nr_pages += MAX_ORDER_NR_PAGES;
- table_size = get_entry_size() * nr_pages;
+ table_size = page_ext_size * nr_pages;
base = memblock_alloc_try_nid(
table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
if (section->page_ext)
return 0;
- table_size = get_entry_size() * PAGES_PER_SECTION;
+ table_size = page_ext_size * PAGES_PER_SECTION;
base = alloc_page_ext(table_size, nid);
/*
* we need to apply a mask.
*/
pfn &= PAGE_SECTION_MASK;
- section->page_ext = (void *)base - get_entry_size() * pfn;
+ section->page_ext = (void *)base - page_ext_size * pfn;
total_usage += table_size;
return 0;
}
struct page *page = virt_to_page(addr);
size_t table_size;
- table_size = get_entry_size() * PAGES_PER_SECTION;
+ table_size = page_ext_size * PAGES_PER_SECTION;
BUG_ON(PageReserved(page));
kmemleak_free(addr);
short last_migrate_reason;
gfp_t gfp_mask;
depot_stack_handle_t handle;
-#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t free_handle;
-#endif
};
-static bool page_owner_disabled = true;
+static bool page_owner_enabled = false;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
static depot_stack_handle_t dummy_handle;
return -EINVAL;
if (strcmp(buf, "on") == 0)
- page_owner_disabled = false;
+ page_owner_enabled = true;
return 0;
}
static bool need_page_owner(void)
{
- if (page_owner_disabled)
- return false;
-
- return true;
+ return page_owner_enabled;
}
static __always_inline depot_stack_handle_t create_dummy_stack(void)
static void init_page_owner(void)
{
- if (page_owner_disabled)
+ if (!page_owner_enabled)
return;
register_dummy_stack();
{
int i;
struct page_ext *page_ext;
-#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t handle = 0;
struct page_owner *page_owner;
- if (debug_pagealloc_enabled())
- handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
-#endif
+ handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+ page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
for (i = 0; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
- if (unlikely(!page_ext))
- continue;
- __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled()) {
- page_owner = get_page_owner(page_ext);
- page_owner->free_handle = handle;
- }
-#endif
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_owner = get_page_owner(page_ext);
+ page_owner->free_handle = handle;
+ page_ext = page_ext_next(page_ext);
}
}
page_owner->gfp_mask = gfp_mask;
page_owner->last_migrate_reason = -1;
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- page_ext = lookup_page_ext(page + i);
+ page_ext = page_ext_next(page_ext);
}
}
if (unlikely(!page_ext))
return;
- page_owner = get_page_owner(page_ext);
- page_owner->order = 0;
- for (i = 1; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
+ for (i = 0; i < (1 << order); i++) {
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
+ page_ext = page_ext_next(page_ext);
}
}
* the new page, which will be freed.
*/
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
* not matter as the mixed block count will still be correct
*/
for (; pfn < end_pfn; ) {
- if (!pfn_valid(pfn)) {
+ page = pfn_to_online_page(pfn);
+ if (!page) {
pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
continue;
}
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
block_end_pfn = min(block_end_pfn, end_pfn);
- page = pfn_to_page(pfn);
pageblock_mt = get_pageblock_migratetype(page);
for (; pfn < block_end_pfn; pfn++) {
if (!pfn_valid_within(pfn))
continue;
+ /* The pageblock is online, no need to recheck. */
page = pfn_to_page(pfn);
if (page_zone(page) != zone)
if (unlikely(!page_ext))
continue;
- if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
return;
}
- if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
pr_alert("page_owner tracks the page as allocated\n");
else
pr_alert("page_owner tracks the page as freed\n");
stack_trace_print(entries, nr_entries, 0);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
handle = READ_ONCE(page_owner->free_handle);
if (!handle) {
pr_alert("page_owner free stack trace missing\n");
pr_alert("page last free stack trace:\n");
stack_trace_print(entries, nr_entries, 0);
}
-#endif
if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n",
* Although we do have the info about past allocation of free
* pages, it's not relevant for current memory usage.
*/
- if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/hugetlb.h>
+#include <linux/huge_mm.h>
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
#include <linux/memremap.h>
/**
* __ksize -- Uninstrumented ksize.
+ * @objp: pointer to the object
*
* Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
* safety checks as ksize() with KASAN instrumentation enabled.
+ *
+ * Return: size of the actual memory used by @objp in bytes
*/
size_t __ksize(const void *objp)
{
static void destroy_memcg_params(struct kmem_cache *s)
{
- if (is_root_cache(s))
+ if (is_root_cache(s)) {
kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
- else
+ } else {
+ mem_cgroup_put(s->memcg_params.memcg);
+ WRITE_ONCE(s->memcg_params.memcg, NULL);
percpu_ref_exit(&s->memcg_params.refcnt);
+ }
}
static void free_memcg_params(struct rcu_head *rcu)
} else {
list_del(&s->memcg_params.children_node);
list_del(&s->memcg_params.kmem_caches_node);
- mem_cgroup_put(s->memcg_params.memcg);
- WRITE_ONCE(s->memcg_params.memcg, NULL);
}
}
#else
return p;
}
+/*
+ * If the object has been wiped upon free, make sure it's fully initialized by
+ * zeroing out freelist pointer.
+ */
+static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
+ void *obj)
+{
+ if (unlikely(slab_want_init_on_free(s)) && obj)
+ memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
+}
+
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}
- /*
- * If the object has been wiped upon free, make sure it's fully
- * initialized by zeroing out freelist pointer.
- */
- if (unlikely(slab_want_init_on_free(s)) && object)
- memset(object + s->offset, 0, sizeof(void *));
+
+ maybe_wipe_obj_freeptr(s, object);
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
goto error;
c = this_cpu_ptr(s->cpu_slab);
+ maybe_wipe_obj_freeptr(s, p[i]);
+
continue; /* goto for-loop */
}
c->freelist = get_freepointer(s, object);
p[i] = object;
+ maybe_wipe_obj_freeptr(s, p[i]);
}
c->tid = next_tid(c->tid);
local_irq_enable();
}
}
- get_online_mems();
+ /*
+ * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
+ * already held which will conflict with an existing lock order:
+ *
+ * mem_hotplug_lock->slab_mutex->kernfs_mutex
+ *
+ * We don't really need mem_hotplug_lock (to hold off
+ * slab_mem_going_offline_callback) here because slab's memory hot
+ * unplug code doesn't destroy the kmem_cache->node[] data.
+ */
+
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
struct kmem_cache_node *n;
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
#endif
- put_online_mems();
kfree(nodes);
return x + sprintf(buf + x, "\n");
}
unlock_page(page);
continue;
}
+
+ /* Take a pin outside pagevec */
+ get_page(page);
+
+ /*
+ * Drop extra pins before trying to invalidate
+ * the huge page.
+ */
+ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
}
ret = invalidate_inode_page(page);
*/
if (!ret)
deactivate_file_page(page);
+ if (PageTransHuge(page))
+ put_page(page);
count += ret;
}
pagevec_remove_exceptionals(&pvec);
*/
unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
{
- unsigned long lru_size;
+ unsigned long lru_size = 0;
int zid;
- if (!mem_cgroup_disabled())
- lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
- else
+ if (!mem_cgroup_disabled()) {
+ for (zid = 0; zid < MAX_NR_ZONES; zid++)
+ lru_size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
+ } else
lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under the i_pages lock, then this ordering is not required.
*/
- if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
- refcount = 1 + HPAGE_PMD_NR;
- else
- refcount = 2;
+ refcount = 1 + compound_nr(page);
if (!page_ref_freeze(page, refcount))
goto cannot_free;
/* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
unsigned int hlen, ll_rs, mtu;
+ ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
struct iphdr *iph;
int err;
if (iter.frag)
ip_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
goto blackhole;
}
+ skb2->tstamp = tstamp;
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
skb->len += copied;
skb->truesize += truesize;
if (sk && sk->sk_type == SOCK_STREAM) {
- sk->sk_wmem_queued += truesize;
+ sk_wmem_queued_add(sk, truesize);
sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
case SO_RCVBUF:
val = min_t(u32, val, sysctl_rmem_max);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
val = min_t(u32, val, sysctl_wmem_max);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+ WRITE_ONCE(sk->sk_sndbuf,
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
break;
case SO_MAX_PACING_RATE: /* 32bit version */
if (val != ~0U)
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
case SO_MARK:
if (sk->sk_mark != val) {
return __peernet2id_alloc(net, peer, &no);
}
-static void rtnl_net_notifyid(struct net *net, int cmd, int id);
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+ struct nlmsghdr *nlh);
/* This function returns the id of a peer netns. If no id is assigned, one will
* be allocated and returned.
*/
id = __peernet2id_alloc(net, peer, &alloc);
spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
- rtnl_net_notifyid(net, RTM_NEWNSID, id);
+ rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL);
if (alive)
put_net(peer);
return id;
idr_remove(&tmp->netns_ids, id);
spin_unlock_bh(&tmp->nsid_lock);
if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL);
if (tmp == last)
break;
}
err = alloc_netid(net, peer, nsid);
spin_unlock_bh(&net->nsid_lock);
if (err >= 0) {
- rtnl_net_notifyid(net, RTM_NEWNSID, err);
+ rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
+ nlh);
err = 0;
} else if (err == -ENOSPC && nsid >= 0) {
err = -EEXIST;
return err < 0 ? err : skb->len;
}
-static void rtnl_net_notifyid(struct net *net, int cmd, int id)
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+ struct nlmsghdr *nlh)
{
struct net_fill_args fillargs = {
+ .portid = portid,
+ .seq = nlh ? nlh->nlmsg_seq : 0,
.cmd = cmd,
.nsid = id,
};
if (err < 0)
goto err_out;
- rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+ rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, 0);
return;
err_out:
fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
- tcp_sk(sk)->fastopen_rsk = NULL;
+ RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL);
spin_lock_bh(&fastopenq->lock);
fastopenq->qlen--;
tcp_rsk(req)->tfo_listener = false;
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned int)sk->sk_rcvbuf)
+ (unsigned int)READ_ONCE(sk->sk_rcvbuf))
return -ENOMEM;
skb_orphan(skb);
* @skb: buffer
* @mpls_lse: MPLS label stack entry to push
* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
+ * @mac_len: length of the MAC header
*
* Expects skb->data at mac header.
*
* Returns 0 on success, -errno otherwise.
*/
-int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto)
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+ int mac_len)
{
struct mpls_shim_hdr *lse;
int err;
return err;
if (!skb->inner_protocol) {
- skb_set_inner_network_header(skb, skb->mac_len);
+ skb_set_inner_network_header(skb, mac_len);
skb_set_inner_protocol(skb, skb->protocol);
}
skb_push(skb, MPLS_HLEN);
memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
+ mac_len);
skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb->mac_len);
+ skb_set_network_header(skb, mac_len);
lse = mpls_hdr(skb);
lse->label_stack_entry = mpls_lse;
*
* @skb: buffer
* @next_proto: ethertype of header after popped MPLS header
+ * @mac_len: length of the MAC header
*
* Expects skb->data at mac header.
*
* Returns 0 on success, -errno otherwise.
*/
-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto)
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len)
{
int err;
if (unlikely(!eth_p_mpls(skb->protocol)))
- return -EINVAL;
+ return 0;
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
+ err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
+ mac_len);
__skb_pull(skb, MPLS_HLEN);
skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb->mac_len);
+ skb_set_network_header(skb, mac_len);
if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
struct ethhdr *hdr;
rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
- } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
+ } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
bh_unlock_sock(sk);
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
*/
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+ WRITE_ONCE(sk->sk_sndbuf,
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
/* Wake up sending tasks if we upped the value. */
sk->sk_write_space(sk);
break;
* returning the value we actually used in getsockopt
* is the most desirable behavior.
*/
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_RCVBUFFORCE:
if (sock->ops->set_rcvlowat)
ret = sock->ops->set_rcvlowat(sk, val);
else
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
case SO_RCVTIMEO_OLD:
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority)
{
- if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+ if (force ||
+ refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
struct sk_buff *skb = alloc_skb(size, priority);
+
if (skb) {
skb_set_owner_w(skb, sk);
return skb;
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+ if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
break;
if (sk->sk_shutdown & SEND_SHUTDOWN)
break;
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+ if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
break;
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
} else {
unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
- if (memory_pressure && *memory_pressure)
- *memory_pressure = 0;
+ if (memory_pressure && READ_ONCE(*memory_pressure))
+ WRITE_ONCE(*memory_pressure, 0);
}
}
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
}
dst->index = index;
INIT_LIST_HEAD(&dst->list);
- list_add_tail(&dsa_tree_list, &dst->list);
+ list_add_tail(&dst->list, &dsa_tree_list);
kref_init(&dst->refcount);
percpu_counter_inc(sk->sk_prot->orphan_count);
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
- BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+ BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
BUG_ON(sk != req->rsk_listener);
/* Paranoid, to prevent race condition if
* Also to satisfy an assertion in
* tcp_v4_destroy_sock().
*/
- tcp_sk(child)->fastopen_rsk = NULL;
+ RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
}
inet_csk_destroy_sock(child);
}
req->sk = child;
req->dl_next = NULL;
if (queue->rskq_accept_head == NULL)
- queue->rskq_accept_head = req;
+ WRITE_ONCE(queue->rskq_accept_head, req);
else
queue->rskq_accept_tail->dl_next = req;
queue->rskq_accept_tail = req;
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
- .idiag_wmem = sk->sk_wmem_queued,
+ .idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
.idiag_fmem = sk->sk_forward_alloc,
.idiag_tmem = sk_wmem_alloc_get(sk),
};
struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
+ ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
int err = 0;
ip_fraglist_prepare(skb, &iter);
}
+ skb->tstamp = tstamp;
err = output(net, sk, skb);
if (!err)
/*
* Put this fragment into the sending queue.
*/
+ skb2->tstamp = tstamp;
err = output(net, sk, skb2);
if (err)
goto fail;
prev = cmpxchg(p, orig, rt);
if (prev == orig) {
if (orig) {
- dst_dev_put(&orig->dst);
+ rt_add_uncached_list(orig);
dst_release(&orig->dst);
}
} else {
int orig_oif = fl4->flowi4_oif;
unsigned int flags = 0;
struct rtable *rth;
- int err = -ENETUNREACH;
+ int err;
if (fl4->saddr) {
- rth = ERR_PTR(-EINVAL);
if (ipv4_is_multicast(fl4->saddr) ||
ipv4_is_lbcast(fl4->saddr) ||
- ipv4_is_zeronet(fl4->saddr))
+ ipv4_is_zeronet(fl4->saddr)) {
+ rth = ERR_PTR(-EINVAL);
goto out;
+ }
+
+ rth = ERR_PTR(-ENETUNREACH);
/* I removed check for oif == dev_out->oif here.
It was wrong for two reasons:
{
unsigned long val;
- if (tcp_memory_pressure)
+ if (READ_ONCE(tcp_memory_pressure))
return;
val = jiffies;
{
unsigned long val;
- if (!tcp_memory_pressure)
+ if (!READ_ONCE(tcp_memory_pressure))
return;
val = xchg(&tcp_memory_pressure, 0);
if (val)
icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+ WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+ WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
sk_sockets_allocated_inc(sk);
sk->sk_route_forced_caps = NETIF_F_GSO;
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
- return (tp->rcv_nxt - tp->copied_seq >= target) ||
+ return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
(sk->sk_prot->stream_memory_read ?
sk->sk_prot->stream_memory_read(sk) : false);
}
/* Connected or passive Fast Open socket? */
if (state != TCP_SYN_SENT &&
- (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+ (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
- if (tp->urg_seq == tp->copied_seq &&
+ if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
!sock_flag(sk, SOCK_URGINLINE) &&
tp->urg_data)
target++;
unlock_sock_fast(sk, slow);
break;
case SIOCATMARK:
- answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+ answ = tp->urg_data &&
+ READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
break;
case SIOCOUTQ:
if (sk->sk_state == TCP_LISTEN)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_una;
+ answ = READ_ONCE(tp->write_seq) - tp->snd_una;
break;
case SIOCOUTQNSD:
if (sk->sk_state == TCP_LISTEN)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_nxt;
+ answ = READ_ONCE(tp->write_seq) -
+ READ_ONCE(tp->snd_nxt);
break;
default:
return -ENOIOCTLCMD;
tcb->sacked = 0;
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
sk_eat_skb(sk, skb);
if (!desc->count)
break;
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
}
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
else
cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
val = min(val, cap);
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
/* Check if we need to signal EPOLLIN right now */
tcp_data_ready(sk);
val <<= 1;
if (val > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = val;
+ WRITE_ONCE(sk->sk_rcvbuf, val);
tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
}
return 0;
out:
up_read(¤t->mm->mmap_sem);
if (length) {
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
if (urg_offset < used) {
if (!urg_offset) {
if (!sock_flag(sk, SOCK_URGINLINE)) {
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
urg_hole++;
offset++;
used--;
}
}
- *seq += used;
+ WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
found_fin_ok:
/* Process the FIN. */
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
break;
}
if (sk->sk_state == TCP_CLOSE) {
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+ struct request_sock *req;
+
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+ lockdep_sock_is_held(sk));
/* We could get here with a non-NULL req if the socket is
* aborted (e.g., closed with unread data) before 3WHS
* finishes.
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int old_state = sk->sk_state;
+ u32 seq;
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);
__kfree_skb(sk->sk_rx_skb_cache);
sk->sk_rx_skb_cache = NULL;
}
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->urg_data = 0;
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
tp->srtt_us = 0;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
tp->rcv_rtt_last_tsecr = 0;
- tp->write_seq += tp->max_window + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
+
+ seq = tp->write_seq + tp->max_window + 2;
+ if (!seq)
+ seq = 1;
+ WRITE_ONCE(tp->write_seq, seq);
+
icsk->icsk_backoff = 0;
tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
if (sk->sk_state != TCP_CLOSE)
err = -EPERM;
else if (tp->repair_queue == TCP_SEND_QUEUE)
- tp->write_seq = val;
+ WRITE_ONCE(tp->write_seq, val);
else if (tp->repair_queue == TCP_RECV_QUEUE)
- tp->rcv_nxt = val;
+ WRITE_ONCE(tp->rcv_nxt, val);
else
err = -EINVAL;
break;
void tcp_done(struct sock *sk)
{
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+ struct request_sock *req;
+
+ /* We might be called with a new socket, after
+ * inet_csk_prepare_forced_close() has been called
+ * so we can not use lockdep_sock_is_held(sk)
+ */
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
} else if (sk->sk_type == SOCK_STREAM) {
const struct tcp_sock *tp = tcp_sk(sk);
- r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
- r->idiag_wqueue = tp->write_seq - tp->snd_una;
+ r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
+ r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
}
if (info)
tcp_get_info(sk, info);
*/
tp = tcp_sk(child);
- tp->fastopen_rsk = req;
+ rcu_assign_pointer(tp->fastopen_rsk, req);
tcp_rsk(req)->tfo_listener = true;
/* RFC1323: The window in SYN & SYN/ACK segments is never
sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
- sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
+ WRITE_ONCE(sk->sk_sndbuf,
+ min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
- sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ min(atomic_read(&sk->sk_rmem_alloc),
+ net->ipv4.sysctl_tcp_rmem[2]));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = rcvbuf;
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */
tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
- if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) &&
+ if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
tcp_try_undo_loss(sk, false))
return;
/* If the retrans timer is currently being used by Fast Open
* for SYN-ACK retrans purpose, stay put.
*/
- if (tp->fastopen_rsk)
+ if (rcu_access_pointer(tp->fastopen_rsk))
return;
if (!tp->packets_out) {
sock_owned_by_me((struct sock *)tp);
tp->bytes_received += delta;
- tp->rcv_nxt = seq;
+ WRITE_ONCE(tp->rcv_nxt, seq);
}
/* Update our send window.
}
tp->urg_data = TCP_URG_NOTYET;
- tp->urg_seq = ptr;
+ WRITE_ONCE(tp->urg_seq, ptr);
/* Disable header prediction. */
tp->pred_flags = 0;
/* Ok.. it's good. Set up sequence numbers and
* move to established.
*/
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
smc_check_reset_syn(tp);
tp->tcp_header_len = sizeof(struct tcphdr);
}
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
+ struct request_sock *req;
+
tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
* we no longer need req so release it.
*/
- reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false);
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+ lockdep_sock_is_held(sk));
+ reqsk_fastopen_remove(sk, req, false);
/* Re-arm the timer because data may have been sent out.
* This is similar to the regular data transmission case
tcp_mstamp_refresh(tp);
tp->rx_opt.saw_tstamp = 0;
- req = tp->fastopen_rsk;
+ req = rcu_dereference_protected(tp->fastopen_rsk,
+ lockdep_sock_is_held(sk));
if (req) {
bool req_stolen;
tcp_try_undo_spurious_syn(sk);
tp->retrans_stamp = 0;
tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
* without appearing to create any others.
*/
if (likely(!tp->repair)) {
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
+ u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
+
+ if (!seq)
+ seq = 1;
+ WRITE_ONCE(tp->write_seq, seq);
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
- tp->write_seq = 0;
+ WRITE_ONCE(tp->write_seq, 0);
}
inet->inet_dport = usin->sin_port;
if (likely(!tp->repair)) {
if (!tp->write_seq)
- tp->write_seq = secure_tcp_seq(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port);
+ WRITE_ONCE(tp->write_seq,
+ secure_tcp_seq(inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port));
tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
inet->inet_saddr,
inet->inet_daddr);
icsk = inet_csk(sk);
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
- fastopen = tp->fastopen_rsk;
+ fastopen = rcu_dereference(tp->fastopen_rsk);
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
- u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
+ u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
- BUG_ON(tp->fastopen_rsk);
+ BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
/* Because we don't lock the socket,
* we might find a transient negative value.
*/
- rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
i, src, srcp, dest, destp, state,
- tp->write_seq - tp->snd_una,
+ READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk;
struct tcp_sock *oldtp, *newtp;
+ u32 seq;
if (!newsk)
return NULL;
/* Now setup tcp_sock */
newtp->pred_flags = 0;
- newtp->rcv_wup = newtp->copied_seq =
- newtp->rcv_nxt = treq->rcv_isn + 1;
+ seq = treq->rcv_isn + 1;
+ newtp->rcv_wup = seq;
+ WRITE_ONCE(newtp->copied_seq, seq);
+ WRITE_ONCE(newtp->rcv_nxt, seq);
newtp->segs_in = 1;
- newtp->snd_sml = newtp->snd_una =
- newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
+ seq = treq->snt_isn + 1;
+ newtp->snd_sml = newtp->snd_una = seq;
+ WRITE_ONCE(newtp->snd_nxt, seq);
+ newtp->snd_up = seq;
INIT_LIST_HEAD(&newtp->tsq_node);
INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
newtp->total_retrans = req->num_retrans;
tcp_init_xmit_timers(newsk);
- newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
+ WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
if (sock_flag(newsk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(newsk,
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
newtp->fastopen_req = NULL;
- newtp->fastopen_rsk = NULL;
+ RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+ WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq);
__skb_unlink(skb, &sk->sk_write_queue);
tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
struct tcp_sock *tp = tcp_sk(sk);
/* Advance write_seq and place onto the write_queue. */
- tp->write_seq = TCP_SKB_CB(skb)->end_seq;
+ WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
}
return -ENOMEM; /* We'll just try again later. */
skb_copy_decrypted(buff, skb);
- sk->sk_wmem_queued += buff->truesize;
+ sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize);
nlen = skb->len - len - nsize;
buff->truesize += nlen;
if (delta_truesize) {
skb->truesize -= delta_truesize;
- sk->sk_wmem_queued -= delta_truesize;
+ sk_wmem_queued_add(sk, -delta_truesize);
sk_mem_uncharge(sk, delta_truesize);
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
}
return -ENOMEM;
skb_copy_decrypted(buff, skb);
- sk->sk_wmem_queued += buff->truesize;
+ sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize);
buff->truesize += nlen;
skb->truesize -= nlen;
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
return -1;
- sk->sk_wmem_queued += nskb->truesize;
+ sk_wmem_queued_add(sk, nskb->truesize);
sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk);
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
*/
- if (tp->fastopen_rsk)
+ if (rcu_access_pointer(tp->fastopen_rsk))
return false;
early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
* if FIN had been sent. This is because retransmit path
* does not change tp->snd_nxt.
*/
- tp->snd_nxt++;
+ WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1);
return;
}
} else {
tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
- sk->sk_wmem_queued += nskb->truesize;
+ sk_wmem_queued_add(sk, nskb->truesize);
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
}
tp->snd_una = tp->write_seq;
tp->snd_sml = tp->write_seq;
tp->snd_up = tp->write_seq;
- tp->snd_nxt = tp->write_seq;
+ WRITE_ONCE(tp->snd_nxt, tp->write_seq);
if (likely(!tp->repair))
tp->rcv_nxt = 0;
else
tp->rcv_tstamp = tcp_jiffies32;
tp->rcv_wup = tp->rcv_nxt;
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
inet_csk(sk)->icsk_retransmits = 0;
tcb->end_seq += skb->len;
__skb_header_release(skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
- tp->write_seq = tcb->end_seq;
+ WRITE_ONCE(tp->write_seq, tcb->end_seq);
tp->packets_out += tcp_skb_pcount(skb);
}
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
*/
- tp->snd_nxt = tp->write_seq;
+ WRITE_ONCE(tp->snd_nxt, tp->write_seq);
tp->pushed_seq = tp->write_seq;
buff = tcp_send_head(sk);
if (unlikely(buff)) {
- tp->snd_nxt = TCP_SKB_CB(buff)->seq;
+ WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq);
tp->pushed_seq = TCP_SKB_CB(buff)->seq;
}
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
* Timer for Fast Open socket to retransmit SYNACK. Note that the
* sk here is the child socket, not the parent (listener) socket.
*/
-static void tcp_fastopen_synack_timer(struct sock *sk)
+static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int max_retries = icsk->icsk_syn_retries ? :
sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct tcp_sock *tp = tcp_sk(sk);
- struct request_sock *req;
- req = tcp_sk(sk)->fastopen_rsk;
req->rsk_ops->syn_ack_timeout(req);
if (req->num_timeout >= max_retries) {
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct request_sock *req;
- if (tp->fastopen_rsk) {
+ req = rcu_dereference_protected(tp->fastopen_rsk,
+ lockdep_sock_is_held(sk));
+ if (req) {
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- tcp_fastopen_synack_timer(sk);
+ tcp_fastopen_synack_timer(sk, req);
/* Before we receive ACK to our SYN-ACK don't retransmit
* anything else (e.g., data or FIN segments).
*/
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &ip6erspan_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
inet6_sk(skb->sk) : NULL;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
+ ktime_t tstamp = skb->tstamp;
int hroom, err = 0;
__be32 frag_id;
u8 *prevhdr, nexthdr = 0;
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
/*
* Put this fragment into the sending queue.
*/
+ frag->tstamp = tstamp;
err = output(net, sk, frag);
if (err)
goto fail;
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
unsigned int mtu, hlen;
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
goto blackhole;
}
+ skb2->tstamp = tstamp;
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
!ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
- tp->write_seq = 0;
+ WRITE_ONCE(tp->write_seq, 0);
}
sk->sk_v6_daddr = usin->sin6_addr;
if (likely(!tp->repair)) {
if (!tp->write_seq)
- tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport);
+ WRITE_ONCE(tp->write_seq,
+ secure_tcpv6_seq(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport));
tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
- fastopen = tp->fastopen_rsk;
+ fastopen = rcu_dereference(tp->fastopen_rsk);
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
/* Because we don't lock the socket,
* we might find a transient negative value.
*/
- rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
state,
- tp->write_seq - tp->snd_una,
+ READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
*
* Send data via reliable llc2 connection.
* Returns 0 upon success, non-zero if action did not succeed.
+ *
+ * This function always consumes a reference to the skb.
*/
static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
{
struct llc_sock* llc = llc_sk(sk);
- int rc = 0;
if (unlikely(llc_data_accept_state(llc->state) ||
llc->remote_busy_flag ||
llc->p_flag)) {
long timeout = sock_sndtimeo(sk, noblock);
+ int rc;
rc = llc_ui_wait_for_busy_core(sk, timeout);
+ if (rc) {
+ kfree_skb(skb);
+ return rc;
+ }
}
- if (unlikely(!rc))
- rc = llc_build_and_send_pkt(sk, skb);
- return rc;
+ return llc_build_and_send_pkt(sk, skb);
}
static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
size_t size = 0;
int rc = -EINVAL, copied = 0, hdrlen;
lock_sock(sk);
if (addr) {
if (msg->msg_namelen < sizeof(*addr))
- goto release;
+ goto out;
} else {
if (llc_ui_addr_null(&llc->addr))
- goto release;
+ goto out;
addr = &llc->addr;
}
/* must bind connection to sap if user hasn't done it. */
/* bind to sap with null dev, exclusive. */
rc = llc_ui_autobind(sock, addr);
if (rc)
- goto release;
+ goto out;
}
hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
size = hdrlen + len;
copied = size - hdrlen;
rc = -EINVAL;
if (copied < 0)
- goto release;
+ goto out;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
if (!skb)
- goto release;
+ goto out;
skb->dev = llc->dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
skb_reserve(skb, hdrlen);
if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
if (addr->sllc_test) {
llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
if (addr->sllc_xid) {
llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
rc = -ENOPROTOOPT;
if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua))
goto out;
rc = llc_ui_send_data(sk, skb, noblock);
+ skb = NULL;
out:
- if (rc) {
- kfree_skb(skb);
-release:
+ kfree_skb(skb);
+ if (rc)
dprintk("%s: failed sending from %02X to %02X: %d\n",
__func__, llc->laddr.lsap, llc->daddr.lsap, rc);
- }
release_sock(sk);
return rc ? : copied;
}
llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
+ skb_get(skb);
llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
- rc = llc_conn_send_pdu(sk, skb);
+ skb_get(skb);
+ llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
return rc;
llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
+ skb_get(skb);
llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
- rc = llc_conn_send_pdu(sk, skb);
+ skb_get(skb);
+ llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
return rc;
#endif
static int llc_find_offset(int state, int ev_type);
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
+static void llc_conn_send_pdus(struct sock *sk);
static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
static int llc_exec_conn_trans_actions(struct sock *sk,
struct llc_conn_state_trans *trans,
* (executing it's actions and changing state), upper layer will be
* indicated or confirmed, if needed. Returns 0 for success, 1 for
* failure. The socket lock has to be held before calling this function.
+ *
+ * This function always consumes a reference to the skb.
*/
int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
{
struct llc_sock *llc = llc_sk(skb->sk);
struct llc_conn_state_ev *ev = llc_conn_ev(skb);
- /*
- * We have to hold the skb, because llc_conn_service will kfree it in
- * the sending path and we need to look at the skb->cb, where we encode
- * llc_conn_state_ev.
- */
- skb_get(skb);
ev->ind_prim = ev->cfm_prim = 0;
/*
* Send event to state machine
rc = llc_conn_service(skb->sk, skb);
if (unlikely(rc != 0)) {
printk(KERN_ERR "%s: llc_conn_service failed\n", __func__);
- goto out_kfree_skb;
- }
-
- if (unlikely(!ev->ind_prim && !ev->cfm_prim)) {
- /* indicate or confirm not required */
- if (!skb->next)
- goto out_kfree_skb;
goto out_skb_put;
}
- if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */
- skb_get(skb);
-
switch (ev->ind_prim) {
case LLC_DATA_PRIM:
+ skb_get(skb);
llc_save_primitive(sk, skb, LLC_DATA_PRIM);
if (unlikely(sock_queue_rcv_skb(sk, skb))) {
/*
* skb->sk pointing to the newly created struct sock in
* llc_conn_handler. -acme
*/
+ skb_get(skb);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_state_change(sk);
break;
sk->sk_state_change(sk);
}
}
- kfree_skb(skb);
sock_put(sk);
break;
case LLC_RESET_PRIM:
* RESET is not being notified to upper layers for now
*/
printk(KERN_INFO "%s: received a reset ind!\n", __func__);
- kfree_skb(skb);
break;
default:
- if (ev->ind_prim) {
+ if (ev->ind_prim)
printk(KERN_INFO "%s: received unknown %d prim!\n",
__func__, ev->ind_prim);
- kfree_skb(skb);
- }
/* No indication */
break;
}
printk(KERN_INFO "%s: received a reset conf!\n", __func__);
break;
default:
- if (ev->cfm_prim) {
+ if (ev->cfm_prim)
printk(KERN_INFO "%s: received unknown %d prim!\n",
__func__, ev->cfm_prim);
- break;
- }
- goto out_skb_put; /* No confirmation */
+ /* No confirmation */
+ break;
}
-out_kfree_skb:
- kfree_skb(skb);
out_skb_put:
kfree_skb(skb);
return rc;
}
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
{
/* queue PDU to send to MAC layer */
skb_queue_tail(&sk->sk_write_queue, skb);
- return llc_conn_send_pdus(sk, skb);
+ llc_conn_send_pdus(sk);
}
/**
if (howmany_resend > 0)
llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
/* any PDUs to re-send are queued up; start sending to MAC */
- llc_conn_send_pdus(sk, NULL);
+ llc_conn_send_pdus(sk);
out:;
}
if (howmany_resend > 0)
llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
/* any PDUs to re-send are queued up; start sending to MAC */
- llc_conn_send_pdus(sk, NULL);
+ llc_conn_send_pdus(sk);
out:;
}
/**
* llc_conn_send_pdus - Sends queued PDUs
* @sk: active connection
- * @hold_skb: the skb held by caller, or NULL if does not care
*
- * Sends queued pdus to MAC layer for transmission. When @hold_skb is
- * NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
- * successfully, or 1 for failure.
+ * Sends queued pdus to MAC layer for transmission.
*/
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
+static void llc_conn_send_pdus(struct sock *sk)
{
struct sk_buff *skb;
- int ret = 0;
while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
if (!skb2)
break;
- dev_queue_xmit(skb2);
- } else {
- bool is_target = skb == hold_skb;
- int rc;
-
- if (is_target)
- skb_get(skb);
- rc = dev_queue_xmit(skb);
- if (is_target)
- ret = rc;
+ skb = skb2;
}
+ dev_queue_xmit(skb);
}
-
- return ret;
}
/**
else {
dprintk("%s: adding to backlog...\n", __func__);
llc_set_backlog_type(skb, LLC_PACKET);
- if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+ if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
goto drop_unlock;
}
out:
* closed and -EBUSY when sending data is not permitted in this state or
* LLC has send an I pdu with p bit set to 1 and is waiting for it's
* response.
+ *
+ * This function always consumes a reference to the skb.
*/
int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
{
struct llc_sock *llc = llc_sk(sk);
if (unlikely(llc->state == LLC_CONN_STATE_ADM))
- goto out;
+ goto out_free;
rc = -EBUSY;
if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */
llc->p_flag)) {
llc->failed_data_req = 1;
- goto out;
+ goto out_free;
}
ev = llc_conn_ev(skb);
ev->type = LLC_CONN_EV_TYPE_PRIM;
ev->prim = LLC_DATA_PRIM;
ev->prim_type = LLC_PRIM_TYPE_REQ;
skb->dev = llc->dev;
- rc = llc_conn_state_process(sk, skb);
-out:
+ return llc_conn_state_process(sk, skb);
+
+out_free:
+ kfree_skb(skb);
return rc;
}
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_ui_cmd(skb);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc))
+ if (likely(!rc)) {
+ skb_get(skb);
rc = dev_queue_xmit(skb);
+ }
return rc;
}
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc))
+ if (likely(!rc)) {
+ skb_get(skb);
rc = dev_queue_xmit(skb);
+ }
return rc;
}
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_test_cmd(skb);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc))
+ if (likely(!rc)) {
+ skb_get(skb);
rc = dev_queue_xmit(skb);
+ }
return rc;
}
* After executing actions of the event, upper layer will be indicated
* if needed(on receiving an UI frame). sk can be null for the
* datalink_proto case.
+ *
+ * This function always consumes a reference to the skb.
*/
static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb)
{
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
- /*
- * We have to hold the skb, because llc_sap_next_state
- * will kfree it in the sending path and we need to
- * look at the skb->cb, where we encode llc_sap_state_ev.
- */
- skb_get(skb);
ev->ind_cfm_flag = 0;
llc_sap_next_state(sap, skb);
- if (ev->ind_cfm_flag == LLC_IND) {
- if (skb->sk->sk_state == TCP_LISTEN)
- kfree_skb(skb);
- else {
- llc_save_primitive(skb->sk, skb, ev->prim);
- /* queue skb to the user. */
- if (sock_queue_rcv_skb(skb->sk, skb))
- kfree_skb(skb);
- }
+ if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) {
+ llc_save_primitive(skb->sk, skb, ev->prim);
+
+ /* queue skb to the user. */
+ if (sock_queue_rcv_skb(skb->sk, skb) == 0)
+ return;
}
kfree_skb(skb);
}
rcu_read_lock();
ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID);
- if (WARN_ON_ONCE(ssid == NULL))
+ if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN,
+ "invalid SSID element (len=%d)", ssid ? ssid[1] : -1))
ssid_len = 0;
else
ssid_len = ssid[1];
rcu_read_lock();
ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
- if (!ssidie) {
+ if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
kfree(assoc_data);
return -EINVAL;
case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP):
/* process for all: mesh, mlme, ibss */
break;
+ case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
+ if (is_multicast_ether_addr(mgmt->da) &&
+ !is_broadcast_ether_addr(mgmt->da))
+ return RX_DROP_MONITOR;
+
+ /* process only for station/IBSS */
+ if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+ sdata->vif.type != NL80211_IFTYPE_ADHOC)
+ return RX_DROP_MONITOR;
+ break;
case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP):
case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP):
- case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
if (is_multicast_ether_addr(mgmt->da) &&
!is_broadcast_ether_addr(mgmt->da))
return 0;
}
+static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *sdata_iter;
+
+ if (!ieee80211_is_radar_required(local))
+ return true;
+
+ if (!regulatory_pre_cac_allowed(local->hw.wiphy))
+ return false;
+
+ mutex_lock(&local->iflist_mtx);
+ list_for_each_entry(sdata_iter, &local->interfaces, list) {
+ if (sdata_iter->wdev.cac_started) {
+ mutex_unlock(&local->iflist_mtx);
+ return false;
+ }
+ }
+ mutex_unlock(&local->iflist_mtx);
+
+ return true;
+}
+
static bool ieee80211_can_scan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
- if (ieee80211_is_radar_required(local))
+ if (!__ieee80211_can_leave_ch(sdata))
return false;
if (!list_empty(&local->roc_list))
lockdep_assert_held(&local->mtx);
- if (local->scan_req || ieee80211_is_radar_required(local))
+ if (local->scan_req)
+ return -EBUSY;
+
+ if (!__ieee80211_can_leave_ch(sdata))
return -EBUSY;
if (!ieee80211_can_scan(local, sdata)) {
if (nf_ct_is_confirmed(ct))
extra_jiffies += nfct_time_stamp;
- if (ct->timeout != extra_jiffies)
- ct->timeout = extra_jiffies;
+ if (READ_ONCE(ct->timeout) != extra_jiffies)
+ WRITE_ONCE(ct->timeout, extra_jiffies);
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
{
int err;
- err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype);
+ err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype,
+ skb->mac_len);
if (err)
return err;
{
int err;
- err = skb_mpls_pop(skb, ethertype);
+ err = skb_mpls_pop(skb, ethertype, skb->mac_len);
if (err)
return err;
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock __rcu *socket; /* socket responsible */
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
unsigned long ack_at; /* When deferred ACK needs to happen */
unsigned long ack_lost_at; /* When ACK is figured as lost */
smp_store_release(&b->conn_backlog_head,
(head + 1) & (size - 1));
- trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
atomic_read(&conn->usage), here);
}
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
call->state = RXRPC_CALL_SERVER_PREALLOC;
- trace_rxrpc_call(call, rxrpc_call_new_service,
+ trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
atomic_read(&call->usage),
here, (const void *)user_call_ID);
rxrpc_see_call(call);
call->conn = conn;
+ call->security = conn->security;
call->peer = rxrpc_get_peer(conn->params.peer);
call->cong_cwnd = call->peer->cong_cwnd;
return call;
if (p->intr)
__set_bit(RXRPC_CALL_IS_INTR, &call->flags);
call->tx_total_len = p->tx_total_len;
- trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
+ trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
+ atomic_read(&call->usage),
here, (const void *)p->user_call_ID);
/* We need to protect a partially set up call against the user as we
if (ret < 0)
goto error;
- trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
- here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
+ atomic_read(&call->usage), here, NULL);
rxrpc_start_call_timer(call);
error:
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, ret);
- trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
- here, ERR_PTR(ret));
+ trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+ atomic_read(&call->usage), here, ERR_PTR(ret));
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
if (n == 0)
return false;
if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
+ here, NULL);
else
rxrpc_put_call(call, rxrpc_call_put_noqueue);
return true;
int n = atomic_read(&call->usage);
ASSERTCMP(n, >=, 1);
if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
+ here, NULL);
else
rxrpc_put_call(call, rxrpc_call_put_noqueue);
return true;
if (call) {
int n = atomic_read(&call->usage);
- trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
+ here, NULL);
}
}
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(&call->usage);
- trace_rxrpc_call(call, op, n, here, NULL);
+ trace_rxrpc_call(call->debug_id, op, n, here, NULL);
}
/*
_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
- trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+ trace_rxrpc_call(call->debug_id, rxrpc_call_release,
+ atomic_read(&call->usage),
here, (const void *)call->flags);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn) {
+ if (conn)
rxrpc_disconnect_call(call);
- conn->security->free_call_crypto(call);
- }
+ if (call->security)
+ call->security->free_call_crypto(call);
rxrpc_cleanup_ring(call);
_leave("");
{
struct rxrpc_net *rxnet = call->rxnet;
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = call->debug_id;
int n;
ASSERT(call != NULL);
n = atomic_dec_return(&call->usage);
- trace_rxrpc_call(call, op, n, here, NULL);
+ trace_rxrpc_call(debug_id, op, n, here, NULL);
ASSERTCMP(n, >=, 0);
if (n == 0) {
_debug("call %d dead", call->debug_id);
rxrpc_get_local(conn->params.local);
key_get(conn->params.key);
- trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage),
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
+ atomic_read(&conn->usage),
__builtin_return_address(0));
trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
_leave(" = %p", conn);
if (cp->exclusive) {
call->conn = candidate;
+ call->security = candidate->security;
call->security_ix = candidate->security_ix;
call->service_id = candidate->service_id;
_leave(" = 0 [exclusive %d]", candidate->debug_id);
candidate_published:
set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
call->conn = candidate;
+ call->security = candidate->security;
call->security_ix = candidate->security_ix;
call->service_id = candidate->service_id;
spin_unlock(&local->client_conns_lock);
spin_lock(&conn->channel_lock);
call->conn = conn;
+ call->security = conn->security;
call->security_ix = conn->security_ix;
call->service_id = conn->service_id;
list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
void rxrpc_put_client_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = conn->debug_id;
int n;
do {
n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
if (n > 0)
return;
ASSERTCMP(n, >=, 0);
if (n == 0)
return false;
if (rxrpc_queue_work(&conn->processor))
- trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
else
rxrpc_put_connection(conn);
return true;
if (conn) {
int n = atomic_read(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
}
}
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_got, n, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
}
/*
if (conn) {
int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
if (n > 0)
- trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
else
conn = NULL;
}
void rxrpc_put_service_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = conn->debug_id;
int n;
n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
ASSERTCMP(n, >=, 0);
if (n == 1)
rxrpc_set_service_reap_timer(conn->params.local->rxnet,
*/
if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
continue;
- trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
if (rxrpc_conn_is_client(conn))
BUG();
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
atomic_read(&conn->usage),
__builtin_return_address(0));
}
{
struct sock_exterr_skb *serr;
struct sockaddr_rxrpc srx;
- struct rxrpc_local *local = sk->sk_user_data;
+ struct rxrpc_local *local;
struct rxrpc_peer *peer;
struct sk_buff *skb;
+ rcu_read_lock();
+ local = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!local)) {
+ rcu_read_unlock();
+ return;
+ }
_enter("%p{%d}", sk, local->debug_id);
/* Clear the outstanding error value on the socket so that it doesn't
skb = sock_dequeue_err_skb(sk);
if (!skb) {
+ rcu_read_unlock();
_leave("UDP socket errqueue empty");
return;
}
serr = SKB_EXT_ERR(skb);
if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
_leave("UDP empty message");
+ rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
return;
}
- rcu_read_lock();
peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
if (peer) {
atomic_set(&peer->usage, 1);
- peer->local = local;
+ peer->local = rxrpc_get_local(local);
INIT_HLIST_HEAD(&peer->error_targets);
peer->service_conns = RB_ROOT;
seqlock_init(&peer->service_conn_lock);
unsigned long hash_key;
hash_key = rxrpc_peer_hash_key(local, &peer->srx);
- peer->local = local;
rxrpc_init_peer(rx, peer, hash_key);
spin_lock(&rxnet->peer_hash_lock);
int n;
n = atomic_inc_return(&peer->usage);
- trace_rxrpc_peer(peer, rxrpc_peer_got, n, here);
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
return peer;
}
if (peer) {
int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
if (n > 0)
- trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
else
peer = NULL;
}
list_del_init(&peer->keepalive_link);
spin_unlock_bh(&rxnet->peer_hash_lock);
+ rxrpc_put_local(peer->local);
kfree_rcu(peer, rcu);
}
void rxrpc_put_peer(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id;
int n;
if (peer) {
+ debug_id = peer->debug_id;
n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
if (n == 0)
__rxrpc_put_peer(peer);
}
void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = peer->debug_id;
int n;
n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
if (n == 0) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
+ rxrpc_put_local(peer->local);
kfree_rcu(peer, rcu);
}
}
seq += subpacket;
}
- return call->conn->security->verify_packet(call, skb, offset, len,
- seq, cksum);
+ return call->security->verify_packet(call, skb, offset, len,
+ seq, cksum);
}
/*
*_offset = offset;
*_len = len;
- call->conn->security->locate_data(call, skb, _offset, _len);
+ call->security->locate_data(call, skb, _offset, _len);
return 0;
}
call->tx_winsize)
sp->hdr.flags |= RXRPC_MORE_PACKETS;
- ret = conn->security->secure_packet(
+ ret = call->security->secure_packet(
call, skb, skb->mark, skb->head);
if (ret < 0)
goto out;
case RXRPC_CALL_SERVER_PREALLOC:
case RXRPC_CALL_SERVER_SECURING:
case RXRPC_CALL_SERVER_ACCEPTING:
+ rxrpc_put_call(call, rxrpc_call_put);
ret = -EBUSY;
goto error_release_sock;
default:
}
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
- [TCA_ACT_KIND] = { .type = NLA_NUL_STRING,
- .len = IFNAMSIZ - 1 },
+ [TCA_ACT_KIND] = { .type = NLA_STRING },
[TCA_ACT_INDEX] = { .type = NLA_U32 },
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
.len = TC_COOKIE_MAX_SIZE },
NL_SET_ERR_MSG(extack, "TC action kind must be specified");
goto err_out;
}
- nla_strlcpy(act_name, kind, IFNAMSIZ);
-
+ if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "TC action name too long");
+ goto err_out;
+ }
if (tb[TCA_ACT_COOKIE]) {
cookie = nla_memdup_cookie(tb);
if (!cookie) {
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
- int ret = 0;
+ int loop, ret;
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
- &attr_size, true, extack);
+ for (loop = 0; loop < 10; loop++) {
+ ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
+ actions, &attr_size, true, extack);
+ if (ret != -EAGAIN)
+ break;
+ }
+
if (ret < 0)
return ret;
ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
*/
if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
-replay:
ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
extack);
- if (ret == -EAGAIN)
- goto replay;
break;
case RTM_DELACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
return err;
pr_info("Mirror/redirect action on\n");
- return tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+ err = tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+ if (err)
+ unregister_netdevice_notifier(&mirred_device_notifier);
+
+ return err;
}
static void __exit mirred_cleanup_module(void)
struct tcf_mpls *m = to_mpls(a);
struct tcf_mpls_params *p;
__be32 new_lse;
- int ret;
+ int ret, mac_len;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
/* Ensure 'data' points at mac_header prior calling mpls manipulating
* functions.
*/
- if (skb_at_tc_ingress(skb))
+ if (skb_at_tc_ingress(skb)) {
skb_push_rcsum(skb, skb->mac_len);
+ mac_len = skb->mac_len;
+ } else {
+ mac_len = skb_network_header(skb) - skb_mac_header(skb);
+ }
ret = READ_ONCE(m->tcf_action);
switch (p->tcfm_action) {
case TCA_MPLS_ACT_POP:
- if (skb_mpls_pop(skb, p->tcfm_proto))
+ if (skb_mpls_pop(skb, p->tcfm_proto, mac_len))
goto drop;
break;
case TCA_MPLS_ACT_PUSH:
new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol));
- if (skb_mpls_push(skb, new_lse, p->tcfm_proto))
+ if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len))
goto drop;
break;
case TCA_MPLS_ACT_MODIFY:
return TC_H_MAJ(first);
}
+static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
+{
+ if (kind)
+ return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
+ memset(name, 0, IFNAMSIZ);
+ return false;
+}
+
static bool tcf_proto_is_unlocked(const char *kind)
{
const struct tcf_proto_ops *ops;
bool ret;
+ if (strlen(kind) == 0)
+ return false;
+
ops = tcf_proto_lookup_ops(kind, false, NULL);
/* On error return false to take rtnl lock. Proto lookup/create
* functions will perform lookup again and properly handle errors.
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ char name[IFNAMSIZ];
struct tcmsg *t;
u32 protocol;
u32 prio;
if (err)
return err;
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+ err = -EINVAL;
+ goto errout;
+ }
+
/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
* block is shared (no qdisc found), qdisc is not unlocked, classifier
* type is not specified, classifier is not unlocked.
*/
if (rtnl_held ||
(q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
- !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ !tcf_proto_is_unlocked(name)) {
rtnl_held = true;
rtnl_lock();
}
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ char name[IFNAMSIZ];
struct tcmsg *t;
u32 protocol;
u32 prio;
if (err)
return err;
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+ err = -EINVAL;
+ goto errout;
+ }
/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
* found), qdisc is not unlocked, classifier type is not specified,
* classifier is not unlocked.
*/
if (!prio ||
(q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
- !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ !tcf_proto_is_unlocked(name)) {
rtnl_held = true;
rtnl_lock();
}
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ char name[IFNAMSIZ];
struct tcmsg *t;
u32 protocol;
u32 prio;
if (err)
return err;
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+ err = -EINVAL;
+ goto errout;
+ }
/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
* unlocked, classifier type is not specified, classifier is not
* unlocked.
*/
if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
- !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ !tcf_proto_is_unlocked(name)) {
rtnl_held = true;
rtnl_lock();
}
*err = -1;
return;
}
- dst->value = sk->sk_wmem_queued;
+ dst->value = READ_ONCE(sk->sk_wmem_queued);
}
META_COLLECTOR(int_sk_fwd_alloc)
*err = -1;
return;
}
- dst->value = sk->sk_rcvlowat;
+ dst->value = READ_ONCE(sk->sk_rcvlowat);
}
META_COLLECTOR(int_sk_rcvtimeo)
}
const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
- [TCA_KIND] = { .type = NLA_NUL_STRING,
- .len = IFNAMSIZ - 1 },
+ [TCA_KIND] = { .type = NLA_STRING },
[TCA_RATE] = { .type = NLA_BINARY,
.len = sizeof(struct tc_estimator) },
[TCA_STAB] = { .type = NLA_NESTED },
parent = *p;
skb = rb_to_skb(parent);
- if (ktime_after(txtime, skb->tstamp)) {
+ if (ktime_compare(txtime, skb->tstamp) >= 0) {
p = &parent->rb_right;
leftmost = false;
} else {
if (skb->ip_summed == CHECKSUM_PARTIAL &&
skb_checksum_help(skb)) {
qdisc_drop(skb, sch, to_free);
+ skb = NULL;
goto finish_segs;
}
finish_segs:
if (segs) {
unsigned int len, last_len;
- int nb = 0;
+ int nb;
- len = skb->len;
+ len = skb ? skb->len : 0;
+ nb = skb ? 1 : 0;
while (segs) {
skb2 = segs->next;
}
segs = skb2;
}
- qdisc_tree_reduce_backlog(sch, -nb, prev_len - len);
+ /* Parent qdiscs accounted for 1 skb of size @prev_len */
+ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
+ } else if (!skb) {
+ return NET_XMIT_DROP;
}
return NET_XMIT_SUCCESS;
}
NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
goto out;
}
+
+ /* Everything went ok, return success. */
+ err = 0;
+
out:
return err;
}
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
bh_lock_sock(sk);
}
- if (sock_owned_by_user(sk)) {
+ if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
if (sctp_add_backlog(sk, skb)) {
bh_unlock_sock(sk);
sctp_chunk_free(chunk);
local_bh_disable();
bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+ if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
+ if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
sctp_chunk_free(chunk);
else
backloged = 1;
if (backloged)
return 0;
} else {
- sctp_inq_push(inqueue, chunk);
+ if (!sctp_newsk_ready(sk)) {
+ if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
+ return 0;
+ sctp_chunk_free(chunk);
+ } else {
+ sctp_inq_push(inqueue, chunk);
+ }
}
done:
struct sctp_ep_common *rcvr = chunk->rcvr;
int ret;
- ret = sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+ ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
if (!ret) {
/* Hold the assoc/ep while hanging on the backlog queue.
* This way, we know structures we need will not disappear
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
- .get_port = sctp_get_port,
+ .no_autobind = true,
.obj_size = sizeof(struct sctp_sock),
.useroffset = offsetof(struct sctp_sock, subscribe),
.usersize = offsetof(struct sctp_sock, initmsg) -
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
- .get_port = sctp_get_port,
+ .no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
rc = SMC_CLC_DECL_MEM;
- goto out;
+ goto ism_put_vlan;
}
lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
smc_llc_link_clear(lnk);
free_lgr:
kfree(lgr);
+ism_put_vlan:
+ if (ini->is_smcd && ini->vlan_id)
+ smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
out:
if (rc < 0) {
if (rc == -ENOMEM)
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
sk->sk_shutdown & RCV_SHUTDOWN ||
- fcrit(conn) ||
- smc_cdc_rxed_any_close_or_senddone(conn),
+ fcrit(conn),
&wait);
remove_wait_queue(sk_sleep(sk), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
return -EAGAIN;
}
+static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+
+ if (smc_rx_data_available(conn))
+ return true;
+ else if (conn->urg_state == SMC_URG_VALID)
+ /* we received a single urgent Byte - skip */
+ smc_rx_update_cons(smc, 0);
+ return false;
+}
+
/* smc_rx_recvmsg - receive data from RMBE
* @msg: copy data to receive buffer
* @pipe: copy data to pipe if set - indicates splice() call
if (read_done >= target || (pipe && read_done))
break;
- if (atomic_read(&conn->bytes_to_rcv))
+ if (smc_rx_recvmsg_data_available(smc))
goto copy;
- else if (conn->urg_state == SMC_URG_VALID)
- /* we received a single urgent Byte - skip */
- smc_rx_update_cons(smc, 0);
if (sk->sk_shutdown & RCV_SHUTDOWN ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) {
+ /* smc_cdc_msg_recv_action() could have run after
+ * above smc_rx_recvmsg_data_available()
+ */
+ if (smc_rx_recvmsg_data_available(smc))
+ goto copy;
break;
+ }
if (read_done) {
if (sk->sk_err ||
struct tipc_msg *hdr = buf_msg(skb);
if (unlikely(msg_in_group(hdr)))
- return sk->sk_rcvbuf;
+ return READ_ONCE(sk->sk_rcvbuf);
if (unlikely(!msg_connected(hdr)))
- return sk->sk_rcvbuf << msg_importance(hdr);
+ return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr);
if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
- return sk->sk_rcvbuf;
+ return READ_ONCE(sk->sk_rcvbuf);
return FLOWCTL_MSG_LIM;
}
i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf);
i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk));
i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf);
- i += scnprintf(buf + i, sz - i, " | %d\n", sk->sk_backlog.len);
+ i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len));
if (dqueues & TIPC_DUMP_SK_SNDQ) {
i += scnprintf(buf + i, sz - i, "sk_write_queue: ");
return virtio_transport_get_ops()->send_pkt(pkt);
}
-static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
+static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
struct virtio_vsock_pkt *pkt)
{
+ if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
+ return false;
+
vvs->rx_bytes += pkt->len;
+ return true;
}
static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
vvs->buf_size_max = val;
vvs->buf_size = val;
vvs->buf_alloc = val;
+
+ virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
+ NULL);
}
EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size);
struct virtio_vsock_pkt *pkt)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- bool free_pkt = false;
+ bool can_enqueue, free_pkt = false;
pkt->len = le32_to_cpu(pkt->hdr.len);
pkt->off = 0;
spin_lock_bh(&vvs->rx_lock);
- virtio_transport_inc_rx_pkt(vvs, pkt);
+ can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
+ if (!can_enqueue) {
+ free_pkt = true;
+ goto out;
+ }
/* Try to copy small packets into the buffer of last packet queued,
* to avoid wasting memory queueing the entire buffer with a small
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_GET_FTM_RESPONDER_STATS);
if (!hdr)
- return -ENOBUFS;
+ goto nla_put_failure;
if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
return pre_cac_allowed;
}
+EXPORT_SYMBOL(regulatory_pre_cac_allowed);
void regulatory_propagate_dfs_state(struct wiphy *wiphy,
struct cfg80211_chan_def *chandef,
*/
#define REG_PRE_CAC_EXPIRY_GRACE_MS 2000
-/**
- * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain
- * @wiphy: wiphy for which pre-CAC capability is checked.
-
- * Pre-CAC is allowed only in ETSI domain.
- */
-bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
-
/**
* regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys
* @wiphy - wiphy on which radar is detected and the event will be propagated
static void
cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
struct cfg80211_bss *nontrans_bss,
- struct ieee80211_mgmt *mgmt, size_t len,
- gfp_t gfp)
+ struct ieee80211_mgmt *mgmt, size_t len)
{
u8 *ie, *new_ie, *pos;
const u8 *nontrans_ssid, *trans_ssid, *mbssid;
const struct cfg80211_bss_ies *old;
u8 cpy_len;
+ lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock);
+
ie = mgmt->u.probe_resp.variable;
new_ie_len = ielen;
if (!mbssid || mbssid < trans_ssid)
return;
new_ie_len -= mbssid[1];
- rcu_read_lock();
+
nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID);
- if (!nontrans_ssid) {
- rcu_read_unlock();
+ if (!nontrans_ssid)
return;
- }
+
new_ie_len += nontrans_ssid[1];
- rcu_read_unlock();
/* generate new ie for nontrans BSS
* 1. replace SSID with nontrans BSS' SSID
* 2. skip MBSSID IE
*/
- new_ie = kzalloc(new_ie_len, gfp);
+ new_ie = kzalloc(new_ie_len, GFP_ATOMIC);
if (!new_ie)
return;
- new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, gfp);
+
+ new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, GFP_ATOMIC);
if (!new_ies)
goto out_free;
cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len,
&non_tx_data, gfp);
+ spin_lock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
+
/* check if the res has other nontransmitting bss which is not
* in MBSSID IE
*/
ies2 = rcu_access_pointer(tmp_bss->ies);
if (ies2->tsf < ies1->tsf)
cfg80211_update_notlisted_nontrans(wiphy, tmp_bss,
- mgmt, len, gfp);
+ mgmt, len);
}
+ spin_unlock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
return res;
}
struct iw_point *data, char *ssid)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int ret = 0;
/* call only for station! */
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
if (ie) {
data->flags = 1;
data->length = ie[1];
- memcpy(ssid, ie + 2, data->length);
+ if (data->length > IW_ESSID_MAX_SIZE)
+ ret = -EINVAL;
+ else
+ memcpy(ssid, ie + 2, data->length);
}
rcu_read_unlock();
} else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) {
}
wdev_unlock(wdev);
- return 0;
+ return ret;
}
int cfg80211_mgd_wext_siwap(struct net_device *dev,
if (!sock_owned_by_user(sk)) {
queued = x25_process_rx_frame(sk, skb);
} else {
- queued = !sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+ queued = !sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
}
bh_unlock_sock(sk);
sock_put(sk);
}
EXPORT_SYMBOL(xsk_umem_consume_tx);
-static int xsk_zc_xmit(struct sock *sk)
+static int xsk_zc_xmit(struct xdp_sock *xs)
{
- struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev = xs->dev;
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
sock_wfree(skb);
}
-static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
- size_t total_len)
+static int xsk_generic_xmit(struct sock *sk)
{
- u32 max_batch = TX_BATCH_SIZE;
struct xdp_sock *xs = xdp_sk(sk);
+ u32 max_batch = TX_BATCH_SIZE;
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
return err;
}
+static int __xsk_sendmsg(struct sock *sk)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ if (unlikely(!(xs->dev->flags & IFF_UP)))
+ return -ENETDOWN;
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
+
+ return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
+}
+
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
if (unlikely(!xsk_is_bound(xs)))
return -ENXIO;
- if (unlikely(!(xs->dev->flags & IFF_UP)))
- return -ENETDOWN;
- if (unlikely(!xs->tx))
- return -ENOBUFS;
- if (need_wait)
+ if (unlikely(need_wait))
return -EOPNOTSUPP;
- return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
+ return __xsk_sendmsg(sk);
}
static unsigned int xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
unsigned int mask = datagram_poll(file, sock, wait);
- struct xdp_sock *xs = xdp_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev;
struct xdp_umem *umem;
dev = xs->dev;
umem = xs->umem;
- if (umem->need_wakeup)
- dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
- umem->need_wakeup);
+ if (umem->need_wakeup) {
+ if (dev->netdev_ops->ndo_xsk_wakeup)
+ dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+ umem->need_wakeup);
+ else
+ /* Poll needs to drive Tx also in copy mode */
+ __xsk_sendmsg(sk);
+ }
if (xs->rx && !xskq_empty_desc(xs->rx))
mask |= POLLIN | POLLRDNORM;
#ifndef __ASM_GOTO_WORKAROUND_H
#define __ASM_GOTO_WORKAROUND_H
-/* this will bring in asm_volatile_goto macro definition
+/*
+ * This will bring in asm_volatile_goto and asm_inline macro definitions
* if enabled by compiler and config options.
*/
#include <linux/types.h>
#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
#endif
+/*
+ * asm_inline is defined as asm __inline in "include/linux/compiler_types.h"
+ * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not
+ * supported by CLANG.
+ */
+#ifdef asm_inline
+#undef asm_inline
+#define asm_inline asm
+#endif
+
#define volatile(x...) volatile("")
#endif
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <linux/perf_event.h>
#include "libbpf.h"
#include "bpf_load.h"
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/// Use devm_platform_ioremap_resource helper which wraps
-/// platform_get_resource() and devm_ioremap_resource() together.
-///
-// Confidence: High
-// Copyright: (C) 2019 Himanshu Jha GPLv2.
-// Copyright: (C) 2019 Julia Lawall, Inria/LIP6. GPLv2.
-// Keywords: platform_get_resource, devm_ioremap_resource,
-// Keywords: devm_platform_ioremap_resource
-
-virtual patch
-virtual report
-
-@r depends on patch && !report@
-expression e1, e2, arg1, arg2, arg3;
-identifier id;
-@@
-
-(
-- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-|
-- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-)
- ... when != id
-- e1 = devm_ioremap_resource(arg3, id);
-+ e1 = devm_platform_ioremap_resource(arg1, arg2);
- ... when != id
-? id = e2
-
-@r1 depends on patch && !report@
-identifier r.id;
-type T;
-@@
-
-- T *id;
- ...when != id
-
-@r2 depends on report && !patch@
-identifier id;
-expression e1, e2, arg1, arg2, arg3;
-position j0;
-@@
-
-(
- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-|
- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-)
- ... when != id
- e1@j0 = devm_ioremap_resource(arg3, id);
- ... when != id
-? id = e2
-
-@script:python depends on report && !patch@
-e1 << r2.e1;
-j0 << r2.j0;
-@@
-
-msg = "WARNING: Use devm_platform_ioremap_resource for %s" % (e1)
-coccilib.report.print_report(j0[0], msg)
from linux import utils
+printk_log_type = utils.CachedType("struct printk_log")
+
class LxDmesg(gdb.Command):
"""Print Linux kernel log buffer."""
b = utils.read_memoryview(inf, log_buf_addr, log_next_idx)
log_buf = a.tobytes() + b.tobytes()
+ length_offset = printk_log_type.get_type()['len'].bitpos // 8
+ text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8
+ time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8
+ text_offset = printk_log_type.get_type().sizeof
+
pos = 0
while pos < log_buf.__len__():
- length = utils.read_u16(log_buf[pos + 8:pos + 10])
+ length = utils.read_u16(log_buf, pos + length_offset)
if length == 0:
if log_buf_2nd_half == -1:
gdb.write("Corrupted log buffer!\n")
pos = log_buf_2nd_half
continue
- text_len = utils.read_u16(log_buf[pos + 10:pos + 12])
- text = log_buf[pos + 16:pos + 16 + text_len].decode(
+ text_len = utils.read_u16(log_buf, pos + text_len_offset)
+ text_start = pos + text_offset
+ text = log_buf[text_start:text_start + text_len].decode(
encoding='utf8', errors='replace')
- time_stamp = utils.read_u64(log_buf[pos:pos + 8])
+ time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset)
for line in text.splitlines():
msg = u"[{time:12.6f}] {line}\n".format(
import os
import re
-from linux import modules
+from linux import modules, utils
if hasattr(gdb, 'Breakpoint'):
module_file = self._get_module_file(module_name)
if module_file:
+ if utils.is_target_arch('s390'):
+ # Module text is preceded by PLT stubs on s390.
+ module_arch = module['arch']
+ plt_offset = int(module_arch['plt_offset'])
+ plt_size = int(module_arch['plt_size'])
+ module_addr = hex(int(module_addr, 0) + plt_offset + plt_size)
gdb.write("loading @{addr}: {filename}\n".format(
addr=module_addr, filename=module_file))
cmdline = "add-symbol-file {filename} {addr}{sections}".format(
return memoryview(inf.read_memory(start, length))
-def read_u16(buffer):
+def read_u16(buffer, offset):
+ buffer_val = buffer[offset:offset + 2]
value = [0, 0]
- if type(buffer[0]) is str:
- value[0] = ord(buffer[0])
- value[1] = ord(buffer[1])
+ if type(buffer_val[0]) is str:
+ value[0] = ord(buffer_val[0])
+ value[1] = ord(buffer_val[1])
else:
- value[0] = buffer[0]
- value[1] = buffer[1]
+ value[0] = buffer_val[0]
+ value[1] = buffer_val[1]
if get_target_endianness() == LITTLE_ENDIAN:
return value[0] + (value[1] << 8)
return value[1] + (value[0] << 8)
-def read_u32(buffer):
+def read_u32(buffer, offset):
if get_target_endianness() == LITTLE_ENDIAN:
- return read_u16(buffer[0:2]) + (read_u16(buffer[2:4]) << 16)
+ return read_u16(buffer, offset) + (read_u16(buffer, offset + 2) << 16)
else:
- return read_u16(buffer[2:4]) + (read_u16(buffer[0:2]) << 16)
+ return read_u16(buffer, offset + 2) + (read_u16(buffer, offset) << 16)
-def read_u64(buffer):
+def read_u64(buffer, offset):
if get_target_endianness() == LITTLE_ENDIAN:
- return read_u32(buffer[0:4]) + (read_u32(buffer[4:8]) << 32)
+ return read_u32(buffer, offset) + (read_u32(buffer, offset + 4) << 32)
else:
- return read_u32(buffer[4:8]) + (read_u32(buffer[0:4]) << 32)
+ return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32)
target_arch = None
# Check for mercurial and a mercurial repo.
if test -d .hg && hgid=`hg id 2>/dev/null`; then
# Do we have an tagged version? If so, latesttagdistance == 1
- if [ "`hg log -r . --template '{latesttagdistance}'`" == "1" ]; then
+ if [ "`hg log -r . --template '{latesttagdistance}'`" = "1" ]; then
id=`hg log -r . --template '{latesttag}'`
printf '%s%s' -hg "$id"
else
ret = snd_hdac_ext_bus_link_power_up(link);
+ /*
+ * clear the register to invalidate all the output streams
+ */
+ snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV,
+ ML_LOSIDV_STREAM_MASK, 0);
/*
* wait for 521usec for codec to report status
* HDA spec section 4.3 - Codec Discovery
nvhdmi_chmap_cea_alloc_validate_get_type;
spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
+ codec->link_down_at_suspend = 1;
+
generic_acomp_init(codec, &nvhdmi_audio_ops, nvhdmi_port2pin);
return 0;
}
}
+static void alc256_fixup_dell_xps_13_headphone_noise2(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ if (action != HDA_FIXUP_ACT_PRE_PROBE)
+ return;
+
+ snd_hda_codec_amp_stereo(codec, 0x1a, HDA_INPUT, 0, HDA_AMP_VOLMASK, 1);
+ snd_hda_override_wcaps(codec, 0x1a, get_wcaps(codec, 0x1a) & ~AC_WCAP_IN_AMP);
+}
+
static void alc269_fixup_limit_int_mic_boost(struct hda_codec *codec,
const struct hda_fixup *fix,
int action)
ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
ALC275_FIXUP_DELL_XPS,
ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
+ ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2,
ALC293_FIXUP_LENOVO_SPK_NOISE,
ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
ALC255_FIXUP_DELL_SPK_NOISE,
ALC225_FIXUP_WYSE_AUTO_MUTE,
ALC225_FIXUP_WYSE_DISABLE_MIC_VREF,
ALC286_FIXUP_ACER_AIO_HEADSET_MIC,
+ ALC256_FIXUP_ASUS_HEADSET_MIC,
ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
ALC299_FIXUP_PREDATOR_SPK,
ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
.chained = true,
.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
},
+ [ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc256_fixup_dell_xps_13_headphone_noise2,
+ .chained = true,
+ .chain_id = ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE
+ },
[ALC293_FIXUP_LENOVO_SPK_NOISE] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_disable_aamix,
.chained = true,
.chain_id = ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE
},
+ [ALC256_FIXUP_ASUS_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11020 }, /* headset mic with jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+ },
[ALC256_FIXUP_ASUS_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
- SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
SND_PCI_QUIRK(0x1028, 0x0738, "Dell Precision 5820", ALC269_FIXUP_NO_SHUTUP),
- SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3),
SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
SND_PCI_QUIRK(0x1028, 0x080c, "Dell WYSE", ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
ep = 0x84;
ifnum = 0;
goto add_sync_ep_from_ifnum;
+ case USB_ID(0x0582, 0x01d8): /* BOSS Katana */
+ /* BOSS Katana amplifiers do not need quirks */
+ return 0;
}
if (attr == USB_ENDPOINT_SYNC_ASYNC &&
CFLAGS += -Wall -O2
CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
-ifeq ($(srctree),)
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
MAKEFLAGS += --no-print-directory
-ifeq ($(srctree),)
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is a ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
override CFLAGS += -fvisibility=hidden
override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+# flags specific for shared library
+SHLIB_FLAGS := -DSHARED
+
ifeq ($(VERBOSE),1)
Q =
else
export srctree OUTPUT CC LD CFLAGS V
include $(srctree)/tools/build/Makefile.include
-BPF_IN := $(OUTPUT)libbpf-in.o
+SHARED_OBJDIR := $(OUTPUT)sharedobjs/
+STATIC_OBJDIR := $(OUTPUT)staticobjs/
+BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o
+BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o
VERSION_SCRIPT := libbpf.map
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE))
-GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
+GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \
sort -u | wc -l)
all_cmd: $(CMD_TARGETS) check
-$(BPF_IN): force elfdep bpfdep
+$(BPF_IN_SHARED): force elfdep bpfdep
@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \
(diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
- $(Q)$(MAKE) $(build)=libbpf
+ $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
+
+$(BPF_IN_STATIC): force elfdep bpfdep
+ $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
$(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
-Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
-$(OUTPUT)libbpf.a: $(BPF_IN)
+$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
check_abi: $(OUTPUT)libbpf.so
@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \
- echo "Warning: Num of global symbols in $(BPF_IN)" \
+ echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \
"($(GLOBAL_SYM_COUNT)) does NOT match with num of" \
"versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
"Please make sure all LIBBPF_API symbols are" \
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
clean:
- $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
+ $(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \
*.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
- *.pc LIBBPF-CFLAGS
+ *.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR)
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
#endif
+/* Symbol versioning is different between static and shared library.
+ * Properly versioned symbols are needed for shared library, but
+ * only the symbol of the new version is needed for static library.
+ */
+#ifdef SHARED
+# define COMPAT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@" #version);
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@@" #version);
+#else
+# define COMPAT_VERSION(internal_name, api_name, version)
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ extern typeof(internal_name) api_name \
+ __attribute__((alias(#internal_name)));
+#endif
+
extern void libbpf_print(enum libbpf_print_level level,
const char *format, ...)
__attribute__((format(printf, 2, 3)));
return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
&config);
}
-asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2");
-asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4");
+COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
+DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
- goto close_bpf_object;
+ goto close_server_fd;
pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
- goto close_cgroup_fd;
+ goto close_server_fd;
pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
CHECK_FAIL(run_test(cgroup_fd, server_fd));
+close_server_fd:
close(server_fd);
close_cgroup_fd:
close(cgroup_fd);
# Setup
tc qdisc add dev lo ingress
+echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
echo "Testing IPv4..."
# Drops all IP/UDP packets coming from port 9
command -v nc >/dev/null 2>&1 || \
{ echo >&2 "nc is not available: skipping TSO tests"; return; }
- # listen on IPv*_DST, capture TCP into $TMPFILE
+ # listen on port 9000, capture TCP into $TMPFILE
if [ "${PROTO}" == "IPv4" ] ; then
IP_DST=${IPv4_DST}
ip netns exec ${NS3} bash -c \
- "nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &"
+ "nc -4 -l -p 9000 > ${TMPFILE} &"
elif [ "${PROTO}" == "IPv6" ] ; then
IP_DST=${IPv6_DST}
ip netns exec ${NS3} bash -c \
- "nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &"
+ "nc -6 -l -p 9000 > ${TMPFILE} &"
RET=$?
else
echo " test_gso: unknown PROTO: ${PROTO}"
/s390x/sync_regs_test
+/s390x/memop
/x86_64/cr4_cpuid_sync_test
/x86_64/evmcs_test
/x86_64/hyperv_cpuid
/x86_64/state_test
/x86_64/sync_regs_test
/x86_64/vmx_close_while_nested_test
+/x86_64/vmx_dirty_log_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
+/x86_64/xss_msr_test
/clear_dirty_log_test
/dirty_log_test
/kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_x86_state *state);
+struct kvm_msr_list *kvm_get_msr_index_list(void);
+
struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_cpuid2 *cpuid);
}
uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value);
void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
uint64_t msr_value);
-uint32_t kvm_get_cpuid_max(void);
+uint32_t kvm_get_cpuid_max_basic(void);
+uint32_t kvm_get_cpuid_max_extended(void);
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
/*
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
+void nested_vmx_check_supported(void);
+
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- for (i = 0; i < num_vcpus; i++) {
- int vcpu_id = first_vcpu_id + i;
-
+ for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++)
/* This asserts that the vCPU was created. */
- vm_vcpu_add(vm, vcpu_id);
- }
+ vm_vcpu_add(vm, i);
kvm_vm_free(vm);
}
return buffer.entry.data;
}
-/* VCPU Set MSR
+/* _VCPU Set MSR
*
* Input Args:
* vm - Virtual Machine
*
* Output Args: None
*
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ * Return: The result of KVM_SET_MSRS.
*
- * Set value of MSR for VCPU.
+ * Sets the value of an MSR for the given VCPU.
*/
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
+int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
buffer.entry.index = msr_index;
buffer.entry.data = msr_value;
r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+ return r;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ * msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
+{
+ int r;
+
+ r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
" rc: %i errno: %i", r, errno);
}
struct kvm_msrs msrs;
};
-static int kvm_get_num_msrs(struct kvm_vm *vm)
+static int kvm_get_num_msrs_fd(int kvm_fd)
{
struct kvm_msr_list nmsrs;
int r;
nmsrs.nmsrs = 0;
- r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
r);
return nmsrs.nmsrs;
}
+static int kvm_get_num_msrs(struct kvm_vm *vm)
+{
+ return kvm_get_num_msrs_fd(vm->kvm_fd);
+}
+
+struct kvm_msr_list *kvm_get_msr_index_list(void)
+{
+ struct kvm_msr_list *list;
+ int nmsrs, r, kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ nmsrs = kvm_get_num_msrs_fd(kvm_fd);
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ close(kvm_fd);
+
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+ r);
+
+ return list;
+}
+
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
}
-uint32_t kvm_get_cpuid_max(void)
+uint32_t kvm_get_cpuid_max_basic(void)
+{
+ return kvm_get_supported_cpuid_entry(0)->eax;
+}
+
+uint32_t kvm_get_cpuid_max_extended(void)
{
return kvm_get_supported_cpuid_entry(0x80000000)->eax;
}
bool pae;
/* SDM 4.1.4 */
- if (kvm_get_cpuid_max() < 0x80000008) {
+ if (kvm_get_cpuid_max_extended() < 0x80000008) {
pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
*pa_bits = pae ? 36 : 32;
*va_bits = 32;
init_vmcs_guest_state(guest_rip, guest_rsp);
}
+void nested_vmx_check_supported(void)
+{
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!(entry->ecx & CPUID_VMX)) {
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+}
+
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
{
static void guest_code(void)
{
- register u64 stage asm("11") = 0;
-
- for (;;) {
- GUEST_SYNC(0);
- asm volatile ("ahi %0,1" : : "r"(stage));
- }
+ /*
+ * We embed diag 501 here instead of doing a ucall to avoid that
+ * the compiler has messed with r11 at the time of the ucall.
+ */
+ asm volatile (
+ "0: diag 0,0,0x501\n"
+ " ahi 11,1\n"
+ " j 0b\n"
+ );
}
#define REG_COMPARE(reg) \
#define VCPU_ID 5
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
void guest_code(void)
{
- /*
- * use a callee-save register, otherwise the compiler
- * saves it around the call to GUEST_SYNC.
- */
- register u32 stage asm("rbx");
- for (;;) {
- GUEST_SYNC(0);
- stage++;
- asm volatile ("" : : "r" (stage));
- }
+ asm volatile("1: in %[port], %%al\n"
+ "add $0x1, %%rbx\n"
+ "jmp 1b"
+ : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
}
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
struct ucall uc;
bool done = false;
+ nested_vmx_check_supported();
+
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
{
struct kvm_vm *vm;
struct kvm_nested_state state;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
* AMD currently does not implement set_nested_state, so for now we
* just early out.
*/
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, 0);
state.flags = KVM_STATE_NESTED_RUN_PENDING;
test_nested_state_expect_einval(vm, &state);
- /*
- * TODO: When SVM support is added for KVM_SET_NESTED_STATE
- * add tests here to support it like VMX.
- */
- if (entry->ecx & CPUID_VMX)
- test_vmx_nested_state(vm);
+ test_vmx_nested_state(vm);
kvm_vm_free(vm);
return 0;
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Tests for the IA32_XSS MSR.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID 1
+#define MSR_BITS 64
+
+#define X86_FEATURE_XSAVES (1<<3)
+
+bool is_supported_msr(u32 msr_index)
+{
+ struct kvm_msr_list *list;
+ bool found = false;
+ int i;
+
+ list = kvm_get_msr_index_list();
+ for (i = 0; i < list->nmsrs; ++i) {
+ if (list->indices[i] == msr_index) {
+ found = true;
+ break;
+ }
+ }
+
+ free(list);
+ return found;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ bool xss_supported = false;
+ struct kvm_vm *vm;
+ uint64_t xss_val;
+ int i, r;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, 0);
+
+ if (kvm_get_cpuid_max_basic() >= 0xd) {
+ entry = kvm_get_supported_cpuid_index(0xd, 1);
+ xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
+ }
+ if (!xss_supported) {
+ printf("IA32_XSS is not supported by the vCPU.\n");
+ exit(KSFT_SKIP);
+ }
+
+ xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
+ TEST_ASSERT(xss_val == 0,
+ "MSR_IA32_XSS should be initialized to zero\n");
+
+ vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
+ /*
+ * At present, KVM only supports a guest IA32_XSS value of 0. Verify
+ * that trying to set the guest IA32_XSS to an unsupported value fails.
+ * Also, in the future when a non-zero value succeeds check that
+ * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
+ */
+ for (i = 0; i < MSR_BITS; ++i) {
+ r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
+ TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
+ "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
+ }
+
+ kvm_vm_free(vm);
+}
char *file = "/dev/zero";
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
#define dma_unmap_single(...) do { } while (0)
#define dma_unmap_page(...) do { } while (0)
+#define dma_max_mapping_size(...) SIZE_MAX
+
#endif
--- /dev/null
+#ifndef XEN_XEN_STUB_H
+#define XEN_XEN_STUB_H
+
+#define xen_domain() 0
+
+#endif
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(select_idx))
counter = upper_32_bits(counter);
-
- else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
+ else if (select_idx != ARMV8_PMU_CYCLE_IDX)
counter = lower_32_bits(counter);
return counter;
*/
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
- u64 counter, reg;
+ u64 counter, reg, val;
pmc = kvm_pmu_get_canonical_pmc(pmc);
if (!pmc->perf_event)
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
- if (kvm_pmu_pmc_is_chained(pmc)) {
- reg = PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
- __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+ if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
+ reg = PMCCNTR_EL0;
+ val = counter;
} else {
- reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
+ reg = PMEVCNTR0_EL0 + pmc->idx;
+ val = lower_32_bits(counter);
}
+ __vcpu_sys_reg(vcpu, reg) = val;
+
+ if (kvm_pmu_pmc_is_chained(pmc))
+ __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+
kvm_pmu_release_perf_event(pmc);
}
struct pt_regs *regs)
{
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
int idx = pmc->idx;
+ u64 period;
+
+ cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * Reset the sample period to the architectural limit,
+ * i.e. the point where the counter overflows.
+ */
+ period = -(local64_read(&perf_event->count));
+
+ if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
+ period &= GENMASK(31, 0);
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
}
+
+ cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
}
/**
* high counter.
*/
attr.sample_period = (-counter) & GENMASK(63, 0);
+ if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
+ attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
+
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow,
pmc + 1);
-
- if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
- attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
} else {
/* The initial sample period (overflow count) of an event. */
if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
int kvm_coalesced_mmio_init(struct kvm *kvm)
{
struct page *page;
- int ret;
- ret = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
- goto out_err;
+ return -ENOMEM;
- ret = 0;
kvm->coalesced_mmio_ring = page_address(page);
/*
spin_lock_init(&kvm->ring_lock);
INIT_LIST_HEAD(&kvm->coalesced_zones);
-out_err:
- return ret;
+ return 0;
}
void kvm_coalesced_mmio_free(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_put_kvm);
+/*
+ * Used to put a reference that was taken on behalf of an object associated
+ * with a user-visible file descriptor, e.g. a vcpu or device, if installation
+ * of the new file descriptor fails and the reference cannot be transferred to
+ * its final owner. In such cases, the caller is still actively using @kvm and
+ * will fail miserably if the refcount unexpectedly hits zero.
+ */
+void kvm_put_kvm_no_destroy(struct kvm *kvm)
+{
+ WARN_ON(refcount_dec_and_test(&kvm->users_count));
+}
+EXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy);
static int kvm_vm_release(struct inode *inode, struct file *filp)
{
kvm_arch_vcpu_unblocking(vcpu);
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
- if (!vcpu_valid_wakeup(vcpu))
- shrink_halt_poll_ns(vcpu);
- else if (halt_poll_ns) {
- if (block_ns <= vcpu->halt_poll_ns)
- ;
- /* we had a long block, shrink polling */
- else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ if (!kvm_arch_no_poll(vcpu)) {
+ if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- /* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < halt_poll_ns &&
- block_ns < halt_poll_ns)
- grow_halt_poll_ns(vcpu);
- } else
- vcpu->halt_poll_ns = 0;
+ } else if (halt_poll_ns) {
+ if (block_ns <= vcpu->halt_poll_ns)
+ ;
+ /* we had a long block, shrink polling */
+ else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vcpu);
+ /* we had a short halt and our poll time is too small */
+ else if (vcpu->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vcpu);
+ } else {
+ vcpu->halt_poll_ns = 0;
+ }
+ }
trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
kvm_arch_vcpu_block_finish(vcpu);
goto unlock_vcpu_destroy;
}
- BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
+ vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
+ BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
/* Now it's all set up, let userspace reach it */
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
if (r < 0) {
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
goto unlock_vcpu_destroy;
}
- kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
+ kvm->vcpus[vcpu->vcpu_idx] = vcpu;
/*
* Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus
kvm_get_kvm(kvm);
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
mutex_unlock(&kvm->lock);